Open1
レコードデータのcsvからファイルに出力

csv_to_html.rb
#!/usr/bin/env ruby
# frozen_string_literal: true
require "csv"
require "cgi"
require "fileutils"
csv_path = ARGV[0] || "records.csv"
out_dir = ARGV[1] || "out"
html_dir = File.join(out_dir, "html")
text_dir = File.join(out_dir, "text")
FileUtils.mkdir_p(html_dir)
FileUtils.mkdir_p(text_dir)
def make_filename(s)
s.to_s.gsub(/[^A-Za-z0-9._-]+/, "_")[0,200]
end
# ファイル全体を読み、エンコーディング→区切り文字を自動判定
raw = File.binread(csv_path)
# 試すエンコーディング(上から優先)
enc_list = ["bom|utf-8", "UTF-8", "CP932", "Shift_JIS", "ISO-8859-1"]
col_seps = [",", "\t", ";", "|"]
rows = nil
chosen_enc = nil
chosen_sep = nil
enc_list.each do |enc|
begin
str = raw.dup.force_encoding(enc).encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
rescue
next
end
first = (str.lines.first || "").sub(/\A\uFEFF/, "") # BOM除去
sep = col_seps.max_by { |s| first.count(s) } || ","
begin
csv = CSV.new(str, headers: true, col_sep: sep)
tmp_rows = csv.read
headers = (tmp_rows.headers || []).map { |h| h.to_s.sub(/\A\uFEFF/, "").strip.downcase }
if ["id","title","contents"].all? { |k| headers.include?(k) }
rows = tmp_rows
chosen_enc = enc
chosen_sep = sep
break
end
rescue
next
end
end
unless rows
# 何が見えたのかヒントを出す
begin
probe = CSV.new(raw.force_encoding("UTF-8"), headers: true).read rescue nil
seen = probe&.headers&.map(&:to_s)&.join(", ")
rescue
seen = nil
end
abort "CSVヘッダーに id,title,contents が見つかりません(enc自動判定失敗)。" \
" 想定外の区切り/ヘッダー名の可能性。確認用: encoding候補=#{enc_list.join('/')}, " \
"区切り候補=#{col_seps.join(' ')}, 先頭行=#{(raw.lines.first || '').strip.inspect}, 検出ヘッダー=#{seen.inspect}"
end
# 正規化ヘッダー→実ヘッダーの対応を作る
norm_map = {}
rows.headers.each do |h|
norm = h.to_s.sub(/\A\uFEFF/, "").strip.downcase
norm_map[norm] = h
end
id_key = norm_map["id"]
title_key = norm_map["title"]
contents_key = norm_map["contents"]
index_rows = []
rows.each do |row|
rid = row[id_key].to_s.strip
next if rid.empty?
title = row[title_key].to_s.strip
contents = row[contents_key].to_s # 改行OK
id_fname = make_filename(rid)
html_body = <<~HTML
<!doctype html>
<meta charset="utf-8">
<title>#{CGI.escapeHTML(title)}</title>
<style>
body{font-family: system-ui, -apple-system, "Segoe UI", Roboto, "Noto Sans JP", sans-serif; line-height:1.7; padding:24px; max-width:960px; margin:auto;}
h1{font-size:20px; margin:0 0 12px;}
.meta{color:#666; font-size:12px; margin-bottom:16px;}
.contents{white-space: pre-wrap; font-size:14px;}
</style>
<h1>#{CGI.escapeHTML(title)}</h1>
<div class="meta">id: #{CGI.escapeHTML(rid)}</div>
<div class="contents">#{CGI.escapeHTML(contents)}</div>
HTML
File.write(File.join(html_dir, "#{id_fname}.html"), html_body, mode: "w", encoding: "UTF-8")
File.write(File.join(text_dir, "#{id_fname}.txt"), contents, mode: "w", encoding: "UTF-8")
index_rows << [rid, title, "html/#{id_fname}.html"]
end
index_html = <<~HTML
<!doctype html>
<meta charset="utf-8">
<title>index</title>
<style>
body{font-family: system-ui, -apple-system, "Segoe UI", Roboto, "Noto Sans JP", sans-serif; line-height:1.6; padding:24px;}
table{border-collapse: collapse; width:100%;}
th,td{border:1px solid #ccc; padding:6px 8px; font-size:14px;}
#q{width:100%; padding:8px; margin-bottom:12px; font-size:14px;}
</style>
<h1>Records</h1>
<input id="q" type="search" placeholder="フィルタ(id / title)">
<table>
<thead><tr><th>id</th><th>title</th><th>link</th></tr></thead>
<tbody id="tbody">
#{index_rows.map { |rid, title, link| "<tr><td>#{CGI.escapeHTML(rid)}</td><td>#{CGI.escapeHTML(title)}</td><td><a href=\"#{link}\">open</a></td></tr>" }.join("\n")}
</tbody>
</table>
<script>
const q = document.getElementById('q');
const tb = document.getElementById('tbody');
q.addEventListener('input', () => {
const v = q.value.toLowerCase();
for (const tr of tb.querySelectorAll('tr')) {
const text = tr.textContent.toLowerCase();
tr.style.display = text.includes(v) ? '' : 'none';
}
});
</script>
HTML
File.write(File.join(out_dir, "index.html"), index_html, mode: "w", encoding: "UTF-8")
puts "OK: enc=#{chosen_enc}, sep=#{chosen_sep.inspect} → #{File.join(out_dir, "index.html")}"