Open1

レコードデータのcsvからファイルに出力

1AR1M1AR1M
csv_to_html.rb
#!/usr/bin/env ruby
# frozen_string_literal: true

require "csv"
require "cgi"
require "fileutils"

csv_path = ARGV[0] || "records.csv"
out_dir  = ARGV[1] || "out"
html_dir = File.join(out_dir, "html")
text_dir = File.join(out_dir, "text")

FileUtils.mkdir_p(html_dir)
FileUtils.mkdir_p(text_dir)

def make_filename(s)
  s.to_s.gsub(/[^A-Za-z0-9._-]+/, "_")[0,200]
end

# ファイル全体を読み、エンコーディング→区切り文字を自動判定
raw = File.binread(csv_path)

# 試すエンコーディング(上から優先)
enc_list = ["bom|utf-8", "UTF-8", "CP932", "Shift_JIS", "ISO-8859-1"]
col_seps = [",", "\t", ";", "|"]

rows = nil
chosen_enc = nil
chosen_sep = nil

enc_list.each do |enc|
  begin
    str = raw.dup.force_encoding(enc).encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
  rescue
    next
  end
  first = (str.lines.first || "").sub(/\A\uFEFF/, "") # BOM除去
  sep = col_seps.max_by { |s| first.count(s) } || ","
  begin
    csv = CSV.new(str, headers: true, col_sep: sep)
    tmp_rows = csv.read
    headers = (tmp_rows.headers || []).map { |h| h.to_s.sub(/\A\uFEFF/, "").strip.downcase }
    if ["id","title","contents"].all? { |k| headers.include?(k) }
      rows = tmp_rows
      chosen_enc = enc
      chosen_sep = sep
      break
    end
  rescue
    next
  end
end

unless rows
  # 何が見えたのかヒントを出す
  begin
    probe = CSV.new(raw.force_encoding("UTF-8"), headers: true).read rescue nil
    seen = probe&.headers&.map(&:to_s)&.join(", ")
  rescue
    seen = nil
  end
  abort "CSVヘッダーに id,title,contents が見つかりません(enc自動判定失敗)。" \
        " 想定外の区切り/ヘッダー名の可能性。確認用: encoding候補=#{enc_list.join('/')}, " \
        "区切り候補=#{col_seps.join(' ')}, 先頭行=#{(raw.lines.first || '').strip.inspect}, 検出ヘッダー=#{seen.inspect}"
end

# 正規化ヘッダー→実ヘッダーの対応を作る
norm_map = {}
rows.headers.each do |h|
  norm = h.to_s.sub(/\A\uFEFF/, "").strip.downcase
  norm_map[norm] = h
end
id_key       = norm_map["id"]
title_key    = norm_map["title"]
contents_key = norm_map["contents"]

index_rows = []

rows.each do |row|
  rid = row[id_key].to_s.strip
  next if rid.empty?
  title = row[title_key].to_s.strip
  contents = row[contents_key].to_s # 改行OK

  id_fname = make_filename(rid)

  html_body = <<~HTML
    <!doctype html>
    <meta charset="utf-8">
    <title>#{CGI.escapeHTML(title)}</title>
    <style>
      body{font-family: system-ui, -apple-system, "Segoe UI", Roboto, "Noto Sans JP", sans-serif; line-height:1.7; padding:24px; max-width:960px; margin:auto;}
      h1{font-size:20px; margin:0 0 12px;}
      .meta{color:#666; font-size:12px; margin-bottom:16px;}
      .contents{white-space: pre-wrap; font-size:14px;}
    </style>
    <h1>#{CGI.escapeHTML(title)}</h1>
    <div class="meta">id: #{CGI.escapeHTML(rid)}</div>
    <div class="contents">#{CGI.escapeHTML(contents)}</div>
  HTML

  File.write(File.join(html_dir, "#{id_fname}.html"), html_body, mode: "w", encoding: "UTF-8")
  File.write(File.join(text_dir, "#{id_fname}.txt"), contents, mode: "w", encoding: "UTF-8")

  index_rows << [rid, title, "html/#{id_fname}.html"]
end

index_html = <<~HTML
  <!doctype html>
  <meta charset="utf-8">
  <title>index</title>
  <style>
    body{font-family: system-ui, -apple-system, "Segoe UI", Roboto, "Noto Sans JP", sans-serif; line-height:1.6; padding:24px;}
    table{border-collapse: collapse; width:100%;}
    th,td{border:1px solid #ccc; padding:6px 8px; font-size:14px;}
    #q{width:100%; padding:8px; margin-bottom:12px; font-size:14px;}
  </style>
  <h1>Records</h1>
  <input id="q" type="search" placeholder="フィルタ(id / title)">
  <table>
    <thead><tr><th>id</th><th>title</th><th>link</th></tr></thead>
    <tbody id="tbody">
      #{index_rows.map { |rid, title, link| "<tr><td>#{CGI.escapeHTML(rid)}</td><td>#{CGI.escapeHTML(title)}</td><td><a href=\"#{link}\">open</a></td></tr>" }.join("\n")}
    </tbody>
  </table>
  <script>
    const q = document.getElementById('q');
    const tb = document.getElementById('tbody');
    q.addEventListener('input', () => {
      const v = q.value.toLowerCase();
      for (const tr of tb.querySelectorAll('tr')) {
        const text = tr.textContent.toLowerCase();
        tr.style.display = text.includes(v) ? '' : 'none';
      }
    });
  </script>
HTML

File.write(File.join(out_dir, "index.html"), index_html, mode: "w", encoding: "UTF-8")
puts "OK: enc=#{chosen_enc}, sep=#{chosen_sep.inspect}#{File.join(out_dir, "index.html")}"