⌨️

『プログラミング文体練習』の演習問題「他言語で実装せよ」をRubyでやる

2023/06/24に公開

歴史的スタイル

1. 古き良き時代 - アセンブリ言語

限られたメモリしかなく、メモリはアドレスでのみで指定する

f = File.open("stop_words.txt", "r")
stop_words = f.read(1024).split(",")
f.close

data = []
data << nil
data << ""
data << nil
data << 0
data << false
data << ""
data << ""
data << 0
data << ""

if File.exist?("_word_freqs.txt")
  File.delete("_word_freqs.txt")
end
word_freqs = File.open("_word_freqs.txt", "wb+")
f = File.open("pride-and-prejudice.txt")
# f = File.open("input.txt")
while true
  data[1] = f.readline rescue ""
  if data[1] == ""
    break
  end
  if data[1][data[1].length - 1] != "\n"
    data[1] += "\n"
  end
  data[2] = nil
  data[3] = 0
  for data[8] in data[1].chars
    if data[2] == nil
      if data[8].match?(/\p{Alpha}/)
        data[2] = data[3]
      end
    else
      if data[8].match?(/\P{Alpha}/)
        data[4] = false
        data[5] = data[1][data[2]...data[3]].downcase
        if data[5].length >= 2 && !stop_words.include?(data[5])
          while true
            data[6] = word_freqs.readline.strip rescue ""
            if data[6] == ""
              break
            end
            data[7] = data[6].split(",")[1].to_i
            data[6] = data[6].split(",")[0].strip
            if data[5] == data[6]
              data[7] += 1
              data[4] = true
              break
            end
          end
          if !data[4]
            word_freqs.printf("%20s,%04d\n", data[5], 1)
          else
            word_freqs.pos -= 26
            word_freqs.printf("%20s,%04d\n", data[5], data[7])
          end
          word_freqs.pos = 0
        end
        data[2] = nil
      end
    end
    data[3] += 1
  end
end
f.close
word_freqs.flush

data.slice!(0..-1)

data = data + [[]] * (25 - data.length)
data << ""
data << 0
data << 0

while true
  data[25] = word_freqs.readline.strip rescue ""
  if data[25] == ""
    break
  end
  data[26] = data[25].split(",")[1].to_i
  data[25] = data[25].split(",")[0].strip
  data[27] = 0
  while data[27] < 25
    if data[data[27]] == [] or data[data[27]][1] < data[26]
      data.insert(data[27], [data[25], data[26]])
      data.pop
      break
    end
    data[27] += 1
  end
end

data[25] = 0
while true
  if data[25] >= 25
    break
  end
  if data[data[25]].length != 2
    break
  end
  puts "#{data[data[25]][0]} - #{data[data[25]][1]}"
  data[25] += 1
end

word_freqs.close

現代の言語がどれだけありがたいかわかる

2. Forthで行こう - スタックマシン

変数の領域はスタックと小さなヒープ領域のみ

$stack = []
$heap = {}

def read_file
  $stack.push(IO.read($stack.pop))
end

def filter_chars
  $stack.push(/[\W_]+/)
  str, re = $stack.pop(2)
  $stack.push(str.gsub(re, " ").downcase)
end

def scan
  $stack.push(*$stack.pop.split)
end

def remove_stop_words
  $stack.push(IO.read("stop_words.txt").scan(/\w+/))
  $stack.last.concat([*"a".."z"])
  $heap[:stop_words] = $stack.pop

  $heap[:words] = []
  while !$stack.empty?
    if $heap[:stop_words].include?($stack.last)
      $stack.pop
    else
      $heap[:words].push($stack.pop)
    end
  end

  $stack.push(*$heap[:words])

  $heap.delete(:stop_words)
  $heap.delete(:words)
end

def frequencies
  $heap[:word_freqs] = {}
  while !$stack.empty?
    if count = $heap[:word_freqs][$stack.last] # count = word_freqs["foo"]
      $stack.push(count)                       # [5]
      $stack.push(1)                           # [5, 1]
      $stack.push($stack.pop + $stack.pop)     # [6]
    else
      $stack.push(1)
    end
    key, count = $stack.pop(2)
    $heap[:word_freqs][key] = count            # word_freqs["foo"] = 6
  end
  $stack.push($heap[:word_freqs])
  $heap.delete(:word_freqs)
end

def sort
  $stack.push(*$stack.pop.sort_by { _2 }) # 後ろから pop するため昇順でよい
end

$stack.push("pride-and-prejudice.txt")
read_file
filter_chars
scan
remove_stop_words
frequencies
sort

$stack.push(0)
while $stack.last < 25 && $stack.size > 1
  $heap[:i] = $stack.pop
  w, f = $stack.pop
  puts "#{w} - #{f}"

  # i += 1 相当
  $stack.push($heap[:i])
  $stack.push(1)
  $stack.push($stack.pop + $stack.pop)
end
  • 演算はスタック上でのみ行われる。i += 1 などと書いてはいけない
  • スタックの状態を常に把握していないと(扱うのは)難しい

3. 配列プログラミング - ベクトル演算

繰り返すのではなく配列に適用する

# data = IO.read("pride-and-prejudice.txt")
data = "Hello  World!"
characters = " #{data} ".chars                                  # => [" ", "H", "e", "l", "l", "o", " ", " ", "W", "o", "r", "l", "d", "!", " "]
characters = characters.map(&:downcase)                         # => [" ", "h", "e", "l", "l", "o", " ", " ", "w", "o", "r", "l", "d", "!", " "]
characters = characters.map { |e| e.sub(/\P{Alpha}/, " ") }     # => [" ", "h", "e", "l", "l", "o", " ", " ", "w", "o", "r", "l", "d", " ", " "]
sp = characters.map.with_index { |e, i| e == " " && i || nil }  # => [0, nil, nil, nil, nil, nil, 6, 7, nil, nil, nil, nil, nil, 13, 14]
sp = sp.compact                                                 # => [0, 6, 7, 13, 14]
w_ranges = sp.each_cons(2).to_a                                 # => [[0, 6], [6, 7], [7, 13], [13, 14]]
w_ranges = w_ranges.find_all { |a, b| (b - a) > 2 }             # => [[0, 6], [7, 13]]
words = w_ranges.map { |a, b| characters[a..b] }                # => [[" ", "h", "e", "l", "l", "o", " "], [" ", "w", "o", "r", "l", "d", " "]]
swords = words.map(&:join).map(&:strip)                         # => ["hello", "world"]
stop_words = IO.read("stop_words.txt").scan(/\w+/).to_set       # => #<Set: {"a", "able", "about", "across", "after", "all", "almost", "also", "am", "among", "an", "and", "any", "are", "as", "at", "be", "because", "been", "but", "by", "can", "cannot", "could", "dear", "did", "do", "does", "either", "else", "ever", "every", "for", "from", "get", "got", "had", "has", "have", "he", "her", "hers", "him", "his", "how", "however", "i", "if", "in", "into", "is", "it", "its", "just", "least", "let", "like", "likely", "may", "me", "might", "most", "must", "my", "neither", "no", "nor", "not", "of", "off", "often", "on", "only", "or", "other", "our", "own", "rather", "said", "say", "says", "she", "should", "since", "so", "some", "than", "that", "the", "their", "them", "then", "there", "these", "they", "this", "tis", "to", "too", "twas", "us", "wants", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "would", "yet", "you", "your"}>
ns_words = swords.reject(&stop_words.method(:include?))         # => ["hello", "world"]
uniq = ns_words.tally                                           # => {"hello"=>1, "world"=>1}
sorted = uniq.sort_by { -_2 }                                   # => [["hello", 1], ["world", 1]]
took = sorted.take(25)                                          # => [["hello", 1], ["world", 1]]
puts took.map { |e| e * " - " }

基本スタイル

4. 一枚岩 - モノリス

サブルーチンという考え方はない

word_freqs = []
stop_words = File.read("stop_words.txt").split(",")
stop_words.concat([*"a".."z"])

File.open("pride-and-prejudice.txt") do |f|
  f.each_line do |line|
    start_char = nil
    line.each_char.with_index do |c, i|
      if !start_char
        if c.match?(/\p{Alpha}/)
          start_char = i
        end
      else
        if c.match?(/\P{Alpha}/)
          found = false
          word = line[start_char...i].downcase
          unless stop_words.include?(word)
            pair_index = 0
            word_freqs.each do |pair|
              if word == pair[0]
                pair[1] += 1
                found = true
                break
              end
              pair_index += 1
            end
            if !found
              word_freqs << [word, 1]
            elsif !word_freqs.empty?
              pair_index.pred.downto(0) do |n|
                if word_freqs[pair_index][1] > word_freqs[n][1]
                  word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]
                  pair_index = n
                end
              end
            end
          end
          start_char = nil
        end
      end
    end
  end
end

word_freqs.take(25).each do |a, b|
  puts "#{a} - #{b}"
end
  • なるべく変数のスコープを広くする
  • バブルソートを自力で実装してなるべく実行速度を遅くする
  • JavaScript を書いているとこんな感じになっていくのはなぜだろう

5. クックブック - 構造化プログラミング

料理をするかのように食材に対して順に変更を加えていく

$data = []
$words = []
$word_freqs = []

def read_file
  $data += IO.read("pride-and-prejudice.txt").chars
end

def filter_chars_and_normalize
  $data.collect! do |c|
    if c.match?(/\p{Alpha}/)
      c.downcase
    else
      " "
    end
  end
end

def scan
  $words += $data.join.split
end

def remove_stop_words
  $words -= IO.read("stop_words.txt").scan(/\w+/)
  $words -= ("a".."z").to_a
end

def frequencies
  $word_freqs = $words.tally
end

def sort
  $word_freqs = $word_freqs.sort_by { |_, c| -c }
end

read_file
filter_chars_and_normalize
scan
remove_stop_words
frequencies
sort

$word_freqs.take(25).each do |v, c|
  puts "#{v} - #{c}"
end
  • 共有データのスコープがとても広い
  • 同じメソッド(手続き)を連続で呼ぶと壊れたりする

6. パイプライン - 関数型プログラミング

クックブックの対極にある感じ

def read_file(path_to_file)
  IO.read(path_to_file)
end

def filter_chars_and_normalize(str_data)
  str_data.downcase.gsub(/[\W_]+/, " ")
end

def scan(str_data)
  str_data.split
end

def remove_stop_words(word_list)
  word_list - [*IO.read("stop_words.txt").scan(/\w+/), *"a".."z"]
end

def frequencies(word_list)
  word_list.tally
end

def sort(word_freqs)
  word_freqs.sort_by { -_2 }
end

def print_all(word_freqs)
  if word_freqs.empty?
    return
  end
  w, f = word_freqs.first
  puts "#{w} - #{f}"
  print_all word_freqs.drop(1)
end

print_all(sort(frequencies(remove_stop_words(scan(filter_chars_and_normalize(read_file("pride-and-prejudice.txt")))))).take(25))

実行順序が右から左になってちょっと見にくい

7. コードゴルフ - ワンライナー

できるだけ少ない行数で実装する

stop_words = IO.read("stop_words.txt").scan(/\w+/)
words = IO.read("pride-and-prejudice.txt").downcase.scan(/[a-z]{2,}/)
puts (words - stop_words).tally.sort_by { -_2 }.take(25).map { |e| e * " - " }

関数合成

8. 合わせ鏡 - 再帰

繰り返しは再帰で行う

RubyVM::InstructionSequence.compile(<<~CODE, __FILE__, __dir__, __LINE__, tailcall_optimization: true).eval
  def count(words, stop_words, word_freqs)
    if words.empty?
      return
    end
    word = words.first
    unless stop_words.include?(word)
      word_freqs[word] += 1
    end
    count(words.drop(1), stop_words, word_freqs)
  end
CODE

def print_all(word_freqs)
  if word_freqs.empty?
    return
  end
  w, f = word_freqs.first
  puts "#{w} - #{f}"
  print_all word_freqs.drop(1)
end

stop_words = File.read("stop_words.txt").scan(/\w+/).to_set
words = File.read("input.txt").downcase.scan(/[a-z]{2,}/)
words = File.read("pride-and-prejudice.txt").downcase.scan(/[a-z]{2,}/)
word_freqs = Hash.new(0)
count(words, stop_words, word_freqs)
print_all word_freqs.sort_by { -_2 }.take(25)

デフォルトでは末尾再帰最適化が効いていないので普通に実行するとスタックが死ぬ。その場合 tailcall_optimization: true で該当コードをコンパイルするか words.each_slice(1000) などとして count を分割して呼ぶ。

9. 継続 - 参照渡し

次に実行するメソッドを渡す

def read_file(path, func)
  func.(IO.read(path), method(:normalize))
end

def filter_chars(str, func)
  func.(str.gsub(/[\W_]+/, " "), method(:scan))
end

def normalize(str, func)
  func.(str.downcase, method(:remove_stop_words))
end

def scan(str, func)
  func.(str.split, method(:frequencies))
end

def remove_stop_words(words, func)
  stop_words = File.read("stop_words.txt").scan(/\w+/) + ("a".."z").to_a
  func.(words - stop_words, method(:sort))
end

def frequencies(words, func)
  func.(words.tally, method(:print_text))
end

def sort(freqs, func)
  func.(freqs.sort_by { -_2 }, method(:no_op))
end

def print_text(freqs, func)
  puts freqs.take(25).collect { |e| e * " - " }
  func.(method(:no_op))
end

def no_op(func)
end

read_file("pride-and-prejudice.txt", method(:filter_chars))

頭がこんがらがる

10. 単子 - モナド

右から左に実行するイメージになってしまうパイプラインの問題を解決する

class TFTheOne
  def initialize(value)
    @value = value
  end

  def bind(func)
    @value = func[@value]
    self
  end

  def to_s
    @value.to_s
  end
end

def read_file(path)
  IO.read(path)
end

def filter_chars(str)
  str.gsub(/[\W_]+/, " ")
end

def normalize(str)
  str.downcase
end

def scan(str)
  str.split
end

def remove_stop_words(words)
  stop_words = IO.read("stop_words.txt").scan(/\w+/) + ("a".."z").to_a
  words - stop_words
end

def frequencies(words)
  words.tally
end

def sort(freqs)
  freqs.sort_by { -_2 }
end

def top25_freqs(freqs)
  freqs.take(25).collect { |e| e * " - " } * "\n"
end

TFTheOne.new("pride-and-prejudice.txt")
  .bind(method(:read_file))
  .bind(method(:filter_chars))
  .bind(method(:normalize))
  .bind(method(:scan))
  .bind(method(:remove_stop_words))
  .bind(method(:frequencies))
  .bind(method(:sort))
  .bind(method(:top25_freqs))
  .display
  • 適用するメソッドを順に書ける
    • パイプラインでは右から左だったが右から左(または上から下)になっている
  • 値が順に変化していく様子はカプセル化されたクックブックスタイルにも見える

オブジェクトとオブジェクトの相互作用

11. モノのプログラム - オブジェクト

データに直接アクセスさせない

class Document
  def initialize(path_to_file)
    @path_to_file = path_to_file
  end

  def each(...)
    words.each(...)
  end

  private

  def words
    @words ||= IO.read(@path_to_file).downcase.scan(/[a-z]{2,}/)
  end
end

class StopWordList
  def include?(word)
    set.include?(word)
  end

  private

  def set
    @set ||= IO.read("stop_words.txt").scan(/\w+/).to_set
  end
end

class Frequency
  def initialize
    @freqs = Hash.new(0)
  end

  def increment(word)
    @freqs[word] += 1
  end

  def sorted
    @freqs.sort_by { -_2 }
  end
end

class Controller
  def initialize(path_to_file)
    @document = Document.new(path_to_file)
    @stop_word_list = StopWordList.new
    @frequency = Frequency.new
  end

  def run
    @document.each do |word|
      unless @stop_word_list.include?(word)
        @frequency.increment(word)
      end
    end
    puts @frequency.sorted.take(25).collect { |e| e * " - " }
  end
end

Controller.new("pride-and-prejudice.txt").run

12. レターボックス - メッセージパッシング

やりとりは dispatch のみ

class Document
  def dispatch(...)
    send(...)
  end

  private

  def setup(path_to_file)
    @data = IO.read(path_to_file)
  end

  def words
    @data.downcase.scan(/[a-z]{2,}/)
  end
end

class StopWordList
  def dispatch(...)
    send(...)
  end

  private

  def setup
    @stop_words = File.read("stop_words.txt").scan(/\w+/).to_set
  end

  def include?(word)
    @stop_words.include?(word)
  end
end

class Frequency
  def initialize
    @freqs = Hash.new(0)
  end

  def dispatch(...)
    send(...)
  end

  private

  def increment(word)
    @freqs[word] += 1
  end

  def sorted
    @freqs.sort_by { -_2 }
  end
end

class Controller
  def dispatch(...)
    send(...)
  end

  private

  def setup(path_to_file)
    @document = Document.new
    @stop_word_list = StopWordList.new
    @frequency = Frequency.new

    @document.dispatch(:setup, path_to_file)
    @stop_word_list.dispatch(:setup)
  end

  def run
    @document.dispatch(:words).each do |word|
      unless @stop_word_list.dispatch(:include?, word)
        @frequency.dispatch(:increment, word)
      end
    end
    puts @frequency.dispatch(:sorted).take(25).collect { |e| e * " - " }
  end
end

controller = Controller.new
controller.dispatch(:setup, "pride-and-prejudice.txt")
controller.dispatch(:run)

13. 閉写像 - プロトタイプ

クラスを持っていない言語がハッシュをどうにかしてクラスのように扱う

data_storage_obj = {
  :data  => [],
  :init  => -> path_to_file { data_storage_obj[:data] = IO.read(path_to_file).downcase.scan(/[a-z]{2,}/) },
  :words => -> { data_storage_obj[:data] },
}

stop_words_obj = {
  :stop_words => [],
  :init       => -> { stop_words_obj[:stop_words] = IO.read("stop_words.txt").scan(/\w+/) },
  :include?   => -> word { stop_words_obj[:stop_words].include?(word) },
}

word_freqs_obj = {
  :freqs     => Hash.new(0),
  :increment => -> word { word_freqs_obj[:freqs][word] += 1 },
  :sorted    => -> { word_freqs_obj[:freqs].sort_by { -_2 } },
  :top25     => -> { word_freqs_obj[:sorted].call.take(25) },
}

data_storage_obj[:init].call("pride-and-prejudice.txt")
stop_words_obj[:init].call

data_storage_obj[:words].call.each do |word|
  unless stop_words_obj[:include?][word]
    word_freqs_obj[:increment][word]
  end
end

puts word_freqs_obj[:top25].call.collect { |e| e * " - " }

14. 抽象的なモノ - 抽象データ型

Java の interface のようなもの

class IDocument
  def each(...)
    raise NotImplementedError, "#{__method__} is not implemented"
  end
end

class IStopWordList
  def include?(word)
    raise NotImplementedError, "#{__method__} is not implemented"
  end
end

class IFrequency
  def increment(word)
    raise NotImplementedError, "#{__method__} is not implemented"
  end

  def sorted
    raise NotImplementedError, "#{__method__} is not implemented"
  end
end

class Document < IDocument
  def initialize(path_to_file)
    @path_to_file = path_to_file
  end

  def each(...)
    words.each(...)
  end

  private

  def words
    @words ||= IO.read(@path_to_file).downcase.scan(/[a-z]{2,}/)
  end
end

class StopWordList < IStopWordList
  def include?(word)
    set.include?(word)
  end

  private

  def set
    @set ||= IO.read("stop_words.txt").scan(/\w+/).to_set
  end
end

class Frequency < IFrequency
  def initialize
    @freqs = Hash.new(0)
  end

  def increment(word)
    @freqs[word] += 1
  end

  def sorted
    @freqs.sort_by { -_2 }
  end
end

class Controller
  def initialize(path_to_file)
    @document = Document.new(path_to_file)
    @stop_word_list = StopWordList.new
    @frequency = Frequency.new
  end

  def run
    @document.each do |word|
      unless @stop_word_list.include?(word)
        @frequency.increment(word)
      end
    end
    puts @frequency.sorted.take(25).collect { |e| e * " - " }
  end
end

Controller.new("pride-and-prejudice.txt").run

ダックタイピングな言語だとそんなに利点はない

15. ハリウッド - 制御の反転

必要となったとき呼ばれる

class Framework
  def initialize
    @load_event_handlers = []
    @dowork_event_handlers = []
    @end_event_handlers = []
  end

  def register_for_load_event(handler)
    @load_event_handlers << handler
  end

  def register_for_dowork_event(handler)
    @dowork_event_handlers << handler
  end

  def register_for_end_event(handler)
    @end_event_handlers << handler
  end

  def run(path_to_file)
    @load_event_handlers.each { |e| e.call(path_to_file) }
    @dowork_event_handlers.each(&:call)
    @end_event_handlers.each(&:call)
  end
end

class Document
  attr_reader :word_event_handlers

  def initialize(app, stop_word_list)
    @word_event_handlers = []
    app.register_for_load_event(method(:load))
    app.register_for_dowork_event(method(:produce_words))
    @stop_word_list = stop_word_list
  end

  private

  def load(path_to_file)
    @data = IO.read(path_to_file)
  end

  def produce_words
    @data.downcase.scan(/[a-z]{2,}/) do |word|
      unless @stop_word_list.include?(word)
        @word_event_handlers.each { |e| e.call(word) }
      end
    end
  end
end

class StopWordList
  def initialize(app)
    app.register_for_load_event(method(:load))
  end

  def include?(word)
    @stop_words.include?(word)
  end

  private

  def load(...)
    @stop_words = IO.read("stop_words.txt").scan(/\w+/).to_set
  end
end

class Frequency
  def initialize(app, document)
    @freqs = Hash.new(0)
    document.word_event_handlers << method(:increment)
    app.register_for_end_event(method(:display))
  end

  private

  def increment(word)
    @freqs[word] += 1
  end

  def display
    puts @freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }
  end
end

app = Framework.new
stop_word_list = StopWordList.new(app)
document = Document.new(app, stop_word_list)
frequency = Frequency.new(app, document)
app.run("pride-and-prejudice.txt")

実行順序が把握しづらい

16. 掲示板 - pub/sub

やりとりは中央にある掲示板を通してのみ

class EventHub
  def initialize
    @subscriptions = Hash.new { |h, k| h[k] = [] }
  end

  def subscribe(type, handler)
    @subscriptions[type] << handler
  end

  def publish(type, *args)
    if @subscriptions[type]
      @subscriptions[type].each { |e| e.call(*args) }
    end
  end
end

class Document
  def initialize(event_hub)
    @event_hub = event_hub
    @event_hub.subscribe(:load, method(:load))
    @event_hub.subscribe(:start, method(:produce_words))
  end

  private

  def load(path_to_file)
    @data = IO.read(path_to_file)
  end

  def produce_words
    @data.downcase.scan(/[a-z]{2,}/) do |word|
      @event_hub.publish(:word, word)
    end
    @event_hub.publish(:eof)
  end
end

class StopWordList
  def initialize(event_hub)
    @stop_words = []
    @event_hub = event_hub
    @event_hub.subscribe(:load, method(:load))
    @event_hub.subscribe(:word, method(:include?))
  end

  def load(*)
    @stop_words = IO.read("stop_words.txt").scan(/\w+/)
  end

  def include?(word)
    unless @stop_words.include?(word)
      @event_hub.publish(:valid_word, word)
    end
  end
end

class Frequency
  def initialize(event_hub)
    @word_freqs = Hash.new(0)
    @event_hub = event_hub
    @event_hub.subscribe(:valid_word, method(:increment))
    @event_hub.subscribe(:print, method(:display))
  end

  def increment(word)
    @word_freqs[word] += 1
  end

  def display
    puts @word_freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }
  end
end

class Application
  def initialize(event_hub)
    @event_hub = event_hub
    @event_hub.subscribe(:run, method(:run))
    @event_hub.subscribe(:eof, method(:stop))
  end

  def run(path_to_file)
    @event_hub.publish(:load, path_to_file)
    @event_hub.publish(:start)
  end

  def stop
    @event_hub.publish(:print)
  end
end

event_hub = EventHub.new
Document.new(event_hub)
StopWordList.new(event_hub)
Frequency.new(event_hub)
Application.new(event_hub)
event_hub.publish(:run, "pride-and-prejudice.txt")

ハリウッドスタイル以上に実行順序の把握が難しい

リフレクションとメタプログラミング

17. 内省性 - イントロスペクション

自分自身の情報にアクセスする

def read_stop_words
  if caller_locations(1..1).first.label == "extract_words"
    IO.read("stop_words.txt").scan(/\w+/)
  end
end

def extract_words(path_to_file)
  data = IO.read(binding.local_variable_get(:path_to_file))
  words = data.downcase.scan(/[a-z]{2,}/)
  words - read_stop_words
end

def frequencies(words)
  binding.local_variable_get(:words).tally
end

freqs = frequencies(extract_words("pride-and-prejudice.txt"))
puts freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }

read_stop_words は extract_words から呼ばれているときだけ動く

18. 自己反映性 - リフレクション

実行時にコードを自分で作る

stops = IO.read("stop_words.txt").scan(/\w+/)

if true
  extract_words_func = %(-> path_to_file { File.read(path_to_file).downcase.scan(/[a-z]{2,}/) - stops })
  frequencies_func = %(-> words { words.tally })
  sort_func = %(-> freqs { freqs.sort_by { -_2 } })
  path_to_name = "pride-and-prejudice.txt"
else
  extract_words_func = %(-> path_to_file { [] })
  frequencies_func = %(-> words { [] })
  sort_func = %(-> freqs { {} })
  path_to_name = __FILE__
end

extract_words = eval(extract_words_func)
frequencies = eval(frequencies_func)
sort = eval(sort_func)

freqs = sort[frequencies[extract_words[path_to_name]]]
puts freqs.take(25).collect { |e| e * " - " }

19. 横断的関心 - アスペクト指向

元のコードはそのままで機能を追加する

require "active_support/core_ext/benchmark"

def extract_words(path_to_file)
  words = IO.read(path_to_file).downcase.scan(/[a-z]{2,}/)
  stop_words = IO.read("stop_words.txt").scan(/\w+/)
  words - stop_words
end

def frequencies(words)
  words.tally
end

def sort(freqs)
  freqs.sort_by { -_2 }
end

def profile(*names)
  names.each do |name|
    m = method(name)
    define_method name do |*args, &block|
      ret_value = nil
      elapsed = Benchmark.ms do
        ret_value = m.call(*args, &block)
      end
      puts "#{m.name}: #{elapsed.round(2)}ms"
      ret_value
    end
  end
end

profile :extract_words, :frequencies, :sort

freqs = sort(frequencies(extract_words("pride-and-prejudice.txt")))
puts freqs.take(25).collect { |e| e * " - " }

profile :extract_wordsextract_words メソッドにベンチーマーク機能がつく

20. プラグイン - 依存性注入

メインプログラムはそのままで実装を切り替える

config.yml
plugins:
  extract_words: extract_words1.rb
  frequencies: frequencies1.rb
extract_words1.rb
def extract_words(path_to_file)
  words = IO.read(path_to_file).downcase.scan(/[a-z]{2,}/)
  stop_words = IO.read("stop_words.txt").scan(/\w+/)
  words - stop_words
end
frequencies1.rb
def frequencies(words)
  words.tally.sort_by { -_2 }.take(25)
end
main.rb
require "yaml"

config = YAML.load_file("config.yml")
eval IO.read(config.dig("plugins", "extract_words"))
eval IO.read(config.dig("plugins", "frequencies"))

word_freqs = frequencies(extract_words("pride-and-prejudice.txt"))
puts word_freqs.collect { |e| e * " - " }

異常事態

21. 構成主義 - 防御的プログラミング

エラーはなかったことにする

def extract_words(path_to_file)
  unless path_to_file.kind_of?(String) && !path_to_file.empty?
    return []
  end

  begin
    data = IO.read(path_to_file)
  rescue => error
    puts error
    return []
  end

  data.downcase.scan(/[a-z]{2,}/)
end

def remove_stop_words(words)
  unless words.kind_of?(Array)
    return []
  end

  begin
    stop_words = IO.read("stop_words.txt").scan(/\w+/)
  rescue => error
    puts error
    return words
  end

  words - stop_words
end

def frequencies(words)
  unless words.kind_of?(Array) && !words.empty?
    return {}
  end

  words.tally
end

def sort(freqs)
  unless freqs.kind_of?(Hash) && !freqs.empty?
    return []
  end

  freqs.sort_by { -_2 }
end

filename = ARGV.first || "pride-and-prejudice.txt"
freqs = sort(frequencies(remove_stop_words(extract_words(filename))))
puts freqs.take(25).collect { |e| e * " - " }
  • HTML や CSS が壊れていてもブラウザが止まらないはこのスタイルだから
  • Ruby で "foo"[100]nil を返したり "".to_i0 を返すのもこのスタイル

22. 癇癪持ち - 契約による設計

エラーが起きたらすぐに抗議する

def extract_words(path_to_file)
  path_to_file.kind_of?(String) or raise TypeError, "I need a string!"
  !path_to_file.empty? or raise ArgumentError, "I need a non-empty string!"

  begin
    text = IO.read(path_to_file)
  rescue => error
    puts error.detailed_message
    raise error
  end

  text.downcase.scan(/[a-z]{2,}/)
end

def remove_stop_words(words)
  words.kind_of?(Array) or raise TypeError, "I need a list!"

  begin
    text = IO.read("stop_words.txt")
  rescue => error
    puts error.detailed_message
    raise error
  end

  stop_words = text.scan(/\w+/)
  words - stop_words
end

def frequencies(words)
  words.kind_of?(Array) or raise TypeError, "I need a list!"
  !words.empty? or raise ArgumentError, "I need a non-empty list!"

  words.tally
end

def sort(freqs)
  freqs.kind_of?(Hash) or raise TypeError, "I need a dictionary!"
  !freqs.empty? or raise ArgumentError, "I need a non-empty dictionary!"

  freqs.sort_by { -_2 }
end

begin
  freqs = sort(frequencies(remove_stop_words(extract_words("pride-and-prejudice.txt"))))
  freqs.kind_of?(Array) or raise TypeError, "OMG! This is not a list!"

  freqs.length >= 25 or raise "SRSLY? Less than 25 words!"
  puts freqs.take(25).collect { |e| e * " - " }
rescue => error
  puts error.detailed_message
  raise error
end

あまりに多い場合は専用の assert メソッドなどを用意した方がいいかもしれない

def assert(expr, message = nil)
  expr or raise message || "Assertion failed"
end

assert を使わない場合は こうあるべき or raise 形式で書けば assert っぽく読める

1 + 2 == 3 or raise "Assertion failed"

23. 受動的攻撃 - 例外

エラーはあとで抗議する

def extract_words(path_to_file)
  path_to_file.kind_of?(String) or raise TypeError, "I need a string!"
  !path_to_file.empty? or raise ArgumentError, "I need a non-empty string!"

  IO.read(path_to_file).downcase.scan(/[a-z]{2,}/)
end

def remove_stop_words(words)
  words.kind_of?(Array) or raise TypeError, "I need a list!"

  stop_words = IO.read("stop_words.txt").scan(/\w+/)
  words - stop_words
end

def frequencies(words)
  words.kind_of?(Array) or raise TypeError, "I need a list!"
  !words.empty? or raise ArgumentError, "I need a non-empty list!"

  words.tally
end

def sort(freqs)
  freqs.kind_of?(Hash) or raise TypeError, "I need a dictionary!"
  !freqs.empty? or raise ArgumentError, "I need a non-empty dictionary!"

  freqs.sort_by { -_2 }
end

begin
  freqs = sort(frequencies(remove_stop_words(extract_words("pride-and-prejudice.txt"))))
  freqs.kind_of?(Array) or raise TypeError, "OMG! This is not a list!"

  freqs.length >= 25 or raise "SRSLY? Less than 25 words!"
  puts freqs.take(25).collect { |e| e * " - " }
rescue => error
  puts error.full_message
end
  • なるべく外側で拾うという点が「癇癪持ち」スタイルとは異なる
  • 一般的に良いとされている方法

24. 意図の宣言 - 型注釈

ダックタイピングの敵

# typed: strict
require "sorbet-runtime"
extend T::Sig

sig { params(path_to_file: String).returns(T::Array[T.any(T::Array[String], String)]) }
def extract_words(path_to_file)
  IO.read(path_to_file).downcase.scan(/[a-z]{2,}/)
end

sig { params(words: T::Array[T.untyped]).returns(T::Array[String]) }
def remove_stop_words(words)
  stop_words = IO.read("stop_words.txt").scan(/\w+/)
  words - stop_words
end

sig { params(words: T::Array[T.untyped]).returns(T::Hash[String, Integer]) }
def frequencies(words)
  words.tally
end

sig { params(freqs: T::Hash[T.untyped, T.untyped]).returns(T::Array[T::Array[String]]) }
def sort(freqs)
  freqs.sort_by { -_2 }
end

freqs = sort(frequencies(remove_stop_words(extract_words("pride-and-prejudice.txt"))))
puts freqs.take(25).collect { |e| e * " - " }

25. 検疫 - 純粋関数と不純関数

表示を伴なうような不純関数を遅延評価させて純粋関数化する

class Quarantine
  def initialize
    @funcs = []
  end

  def bind(func)
    @funcs << func
    self
  end

  def execute
    value = @funcs.reduce(-> {}) { |a, e| e[guard_callable(a)] }
    puts guard_callable(value)
  end

  private

  def guard_callable(value)
    if value.respond_to?(:call)
      value.call
    else
      value
    end
  end
end

def get_input(*)
  -> { ARGV.first || "pride-and-prejudice.txt" }
end

def extract_words(path_to_file)
  -> { IO.read(path_to_file).downcase.scan(/[a-z]{2,}/) }
end

def remove_stop_words(words)
  -> { words - IO.read("stop_words.txt").scan(/\w+/) }
end

def frequencies(words)
  words.tally
end

def sort(freqs)
  freqs.sort_by { -_2 }
end

def top25_freqs(freqs)
  puts freqs.take(25).collect { |e| e * " - " }
end

Quarantine.new
  .bind(method(:get_input))
  .bind(method(:extract_words))
  .bind(method(:remove_stop_words))
  .bind(method(:frequencies))
  .bind(method(:sort))
  .bind(method(:top25_freqs))
  .execute

データ中心

26. データベース - SQL

とりあえず全部DBに入れておく

require "active_record"
require "active_support/core_ext/object/with_options"

ActiveRecord::Base.establish_connection(adapter: "sqlite3", database: "_tf.db")
ActiveRecord::Migration.verbose = false
ActiveRecord::Schema.define do
  with_options if_not_exists: true do
    create_table :documents do |t|
      t.string :name
    end

    create_table :words do |t|
      t.belongs_to :document
      t.string :value
    end

    create_table :characters do |t|
      t.belongs_to :word
      t.string :value
    end
  end
end

class Document < ActiveRecord::Base
  has_many :words, dependent: :destroy

  after_create do
    words.create!(extract_words.collect { |e| { value: e } })
  end

  private

  def extract_words
    stop_words = IO.read("stop_words.txt").scan(/\w+/)
    words = IO.read(name).downcase.scan(/[a-z]{2,}/)
    words - stop_words
  end
end

class Word < ActiveRecord::Base
  belongs_to :document
  has_many :characters, dependent: :destroy

  scope :frequency, -> { group(:value).order(:count_all).reverse_order }

  after_create do
    characters.insert_all!(value.chars.collect { |e| { value: e } })
  end
end

class Character < ActiveRecord::Base
  belongs_to :word
end

document = Document.find_or_create_by!(name: "pride-and-prejudice.txt")
Document.count   # => 1
Word.count       # => 56615
Character.count  # => 354865
puts document.words.frequency.limit(25).count.collect { |e| e * " - " }

富豪的で好き。初回だけめっちゃ時間かかる。

27. スプレッドシート - リアクティブプログラミング

表計算をイメージする

class Column
  attr_accessor :values

  def initialize(&formula)
    @formula = formula
    @values = []
  end

  def update
    if @formula
      @values = @formula.call
    end
  end
end

all_words      = Column.new
stop_words     = Column.new
non_stop_words = Column.new { all_words.values.collect { |e| stop_words.values.include?(e) ? nil : e } }
unique_words   = Column.new { non_stop_words.values.compact.uniq }
counts         = Column.new { unique_words.values.collect { |e| non_stop_words.values.count(e) } }
sorted_data    = Column.new { unique_words.values.zip(counts.values).sort_by { -_2 } }

all_columns = { all_words:, stop_words:, non_stop_words:, unique_words:, counts:, sorted_data: }

update = -> { all_columns.values.each(&:update) }

all_words.values = IO.read("pride-and-prejudice.txt").downcase.scan(/[a-z]{2,}/)
stop_words.values = IO.read("stop_words.txt").scan(/\w+/)
update.call
puts sorted_data.values.take(25).collect { |e| e * " - " }

実際に視覚化するとわかりやすい

require "table_format"
all_words.values = %w(a b c bar b c foo c)
stop_words.values = %w(foo bar)
update.call
tp all_words.values.each_index.collect { |i|
  all_columns.inject({}) { |a, (k, v)| a.merge(k => v.values[i]) }
}
all_words stop_words non_stop_words unique_words counts sorted_data
a foo a a 1 ["c", 3]
b bar b b 2 ["b", 2]
c c c 3 ["a", 1]
bar
b b
c c
foo
c c

28. データストリーム - ジェネレータ

必要な分だけ少しづつ処理する

def lines(path_to_file)
  File.open(path_to_file) do |f|
    f.each_line do |line|
      yield line.downcase
    end
  end
end

def all_words(path_to_file)
  lines(path_to_file) do |line|
    line.scan(/[a-z]{2,}/) do |word|
      yield word
    end
  end
end

def non_stop_words(path_to_file)
  stop_words = IO.read("stop_words.txt").scan(/\w+/).to_set
  all_words(path_to_file) do |word|
    unless stop_words.include?(word)
      yield word
    end
  end
end

def count_and_sort(path_to_file)
  freqs = Hash.new(0)
  i = 0
  non_stop_words(path_to_file) do |word|
    freqs[word] += 1
    if i.modulo(5000).zero?
      yield freqs.sort_by { -_2 }
    end
    i += 1
  end
  yield freqs.sort_by { -_2 }
end

count_and_sort("pride-and-prejudice.txt") do |freqs|
  puts "-----------------------------"
  puts freqs.take(25).collect { |e| e * " - " }
end

終わりがないデータやまとめて処理するメモリが足りないときに使う

並行性

29. アクター - スレッド

スレッド間のやりとりはスレッド毎のキューへのプッシュのみ

require "active_support/core_ext/module/delegation"

class ActiveObject
  delegate :<<, to: :@queue
  delegate :join, to: :@thread
  private delegate :kill, to: :@thread

  def initialize
    @queue = Queue.new
    @thread = Thread.start do
      loop do
        dispatch(*@queue.shift)
      end
    end
  end

  private

  if false
    def dispatch(type, *args)
      puts "#{self.class.name}##{type}"
      send(type, *args)
    end
  else
    def dispatch(...)
      send(...)
    end
  end
end

class Document < ActiveObject
  private

  def setup(path_to_file, stop_word_list)
    @stop_word_list = stop_word_list
    @data = IO.read(path_to_file)
  end

  def process_words(recipient)
    @data.downcase.scan(/[a-z]{2,}/) do |word|
      @stop_word_list << [:filter, word]
    end
    @stop_word_list << [:sorted, recipient]
  end

  def kill
    @stop_word_list << [:kill]
    super
  end
end

class StopWordList < ActiveObject
  private

  def setup(frequency)
    @frequency = frequency
    @stop_words = IO.read("stop_words.txt").scan(/\w+/).to_set
  end

  def filter(word)
    unless @stop_words.include?(word)
      @frequency << [:increment, word]
    end
  end

  def sorted(recipient)
    @frequency << [:sorted, recipient]
  end

  def kill
    @frequency << [:kill]
    super
  end
end

class Frequency < ActiveObject
  private

  def increment(word)
    freqs[word] += 1
  end

  def sorted(recipient)
    recipient << [:top25, freqs.sort_by { -_2 }]
  end

  def freqs
    @freqs ||= Hash.new(0)
  end
end

class Controller < ActiveObject
  private

  def run(document)
    @document = document
    @document << [:process_words, self]
  end

  def top25(sorted)
    puts sorted.take(25).collect { |e| e * " - " }
    self << [:kill]
  end

  def kill
    @document << [:kill]
    super
  end
end

frequency = Frequency.new
stop_word_list = StopWordList.new
stop_word_list << [:setup, frequency]
document = Document.new
document << [:setup, "pride-and-prejudice.txt", stop_word_list]
controller = Controller.new
controller << [:run, document]

[frequency, stop_word_list, document, controller].each(&:join)

スレッドを綺麗に終了させるのが難しいので単に (Thread.list - [Thread.main]).each(&:kill) でもいいかもしれない

30. データ空間 - 並列処理

スレッド間のやりとりはスレッドとは独立した2つのキューのみ

require "timeout"

word_space = Queue.new
freq_space = Queue.new

stop_words = IO.read("stop_words.txt").scan(/\w+/).to_set

IO.read("pride-and-prejudice.txt").downcase.scan(/[a-z]{2,}/) do |word|
  word_space << word
end

5.times.collect { |i|
  Thread.start do
    freqs = Hash.new(0)
    loop do
      word = nil
      begin
        Timeout.timeout(1) do
          word = word_space.shift
        end
      rescue Timeout::Error
        break
      end
      Thread.pass               # 激しく分散させるため
      unless stop_words.include?(word)
        freqs[word] += 1
      end
    end
    freq_space << freqs
  end
}.each(&:join)

freqs = {}
while !freq_space.empty?
  freqs.update(freq_space.shift) { _2 + _3 }
end
puts freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }

1秒間暇だったらスレッドたちは自動的に終了する

31. マップリデュース - MapReduce

単語抽出を並列処理する

def partition(text, nlines)
  text.lines.each_slice(nlines).map do |lines|
    yield lines.join
  end
end

def split_words(text)
  words = text.downcase.scan(/[a-z]{2,}/)
  stop_words = IO.read("stop_words.txt").scan(/\w+/)
  (words - stop_words).tally # 演習問題31-2の部分カウントを適用する
end

text = IO.read("pride-and-prejudice.txt")
splits = partition(text, 200) { |e| Thread.start { split_words(e) } }.map(&:value)
freqs = splits.reduce({}) { |a, e| a.merge(e) { _2 + _3 } }
puts freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }

途中で [[word1, 1], [word2, 1]] の形式にする利点がわからなかったので単に [word1, word2] とした。

map { |e| e }map { |e| Thread.start { e } }.map(&:value) の形に置き換えることができる。

[3, 4].map { |e| e.next }                                # => [4, 5]
[3, 4].map { |e| Thread.start { e.next } }.map(&:value)  # => [4, 5]

なので並行性(演習問題31-3)を適用しない場合は次のように元に戻しても結果は変わらない。

-  splits = partition(text, 200) { |e| Thread.start { split_words(e) } }.map(&:value)
+  splits = partition(text, 200) { |e| split_words(e) }

32. 二重マップリデュース - Hadoop

単語抽出を並列処理したあと再編成して頻度集計も並列処理する

def partition(text, nlines)
  text.lines.each_slice(nlines).map do |lines|
    yield lines.join
  end
end

def split_words(text)
  words = text.downcase.scan(/[a-z]{2,}/)
  stop_words = IO.read("stop_words.txt").scan(/\w+/)
  words - stop_words
end

text = IO.read("pride-and-prejudice.txt")
splits = partition(text, 200) { |e| Thread.start { split_words(e) } }.map(&:value)
splits_per_word = splits.reduce({}) { |a, e| a.merge(e.group_by(&:itself)) { _2 + _3 } }
freqs = splits_per_word.map { |k, v| Thread.start { [k, v.size] } }.map(&:value)
puts freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }

対話性

33. 三位一体 - MVC

データと表示と制御に分ける

class WordFrequenciesModel
  attr_accessor :freqs

  def initialize(path_to_file)
    @freqs = {}
    update(path_to_file)
  end

  def update(path_to_file)
    words = IO.read(path_to_file).downcase.scan(/[a-z]{2,}/)
    @freqs = (words - stop_words).tally
  end

  private

  def stop_words
    @stop_words ||= IO.read("stop_words.txt").scan(/\w+/)
  end
end

class WordFrequenciesView
  def initialize(model)
    @model = model
  end

  def render
    sorted_freqs = @model.freqs.sort_by { -_2 }.take(25)
    puts sorted_freqs.collect { |e| e * " - " }
  end
end

class WordFrequencyController
  def initialize(model, view)
    @model, @view = model, view
  end

  def show
    @view.render
  end
end

m = WordFrequenciesModel.new("pride-and-prejudice.txt")
v = WordFrequenciesView.new(m)
c = WordFrequencyController.new(m, v)
c.show
  • 分離する基準が人によって異なる
  • 上のコードの場合、並び替えの債務が定まらない

34. レストフル - ステートレス

セッションの状態はクライアントが持つ

class Server
  def initialize
    @data = {}
  end

  def handle_request(verb, uri, *args)
    send("handler_#{verb}_#{uri}".downcase, *args)
  end

  private

  def handler_get_default
    rep = []
    rep << "What would you like to do?"
    rep << "1 - Quit"
    rep << "2 - Upload file"
    links = {
      "1" => ["POST", "quit"],
      "2" => ["GET", "file_form"],
    }
    [rep, links]
  end

  def handler_post_quit
    puts "Goodbye cruel world..."
    exit
  end

  def handler_get_file_form
    ["Name of file to upload?", ["POST", "file"]]
  end

  def handler_post_file(filename)
    create_data(filename)
    handler_get_word(filename, 0)
  end

  def handler_get_word(filename, index)
    word, count = freq_at_index(filename, index)
    rep = []
    rep << "##{index.next}: #{word} - #{count}"
    rep << "What would you like to do next?"
    rep << "1 - Quit"
    rep << "2 - Upload file"
    rep << "3 - See next most-frequently occurring word"
    links = {
      "1" => ["POST", "quit"],
      "2" => ["GET", "file_form"],
      "3" => ["GET", "word", filename, index.next],
    }
    [rep, links]
  end

  def freq_at_index(filename, index)
    @data[filename][index] || ["no more words", 0]
  end

  def create_data(filename)
    @data[filename] ||= yield_self do
      words = IO.read(filename).downcase.scan(/[a-z]{2,}/)
      @data[filename] = (words - stop_words).tally.sort_by { -_2 }
    end
  end

  def stop_words
    @stop_words ||= IO.read("stop_words.txt").scan(/\w+/)
  end
end

class Client
  def initialize
    @server = Server.new
  end

  def run
    request = ["GET", "default"]
    loop do
      state_representation, links = @server.handle_request(*request)
      request = render_and_get_input(state_representation, links)
    end
  end

  private

  def render_and_get_input(state_representation, links)
    puts state_representation
    case
    when links.kind_of?(Hash)
      links.fetch(input)
    when links.first == "POST"
      links + [input]
    else
      links
    end
  end

  def input
    print "> "
    gets.strip
  end
end

if true
  server = Server.new

  server.handle_request("GET", "default")                             # => [["What would you like to do?", "1 - Quit", "2 - Upload file"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"]}]
  server.handle_request("POST", "file", "input.txt")                  # => [["#1: live - 2", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "input.txt", 1]}]
  server.handle_request("GET", "word", "input.txt", 0)                # => [["#1: live - 2", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "input.txt", 1]}]
  server.handle_request("GET", "word", "input.txt", 1)                # => [["#2: mostly - 2", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "input.txt", 2]}]
  server.handle_request("GET", "word", "input.txt", 100)              # => [["#101: no more words - 0", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "input.txt", 101]}]

  server.handle_request("GET", "default")                             # => [["What would you like to do?", "1 - Quit", "2 - Upload file"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"]}]
  server.handle_request("POST", "file", "pride-and-prejudice.txt")    # => [["#1: mr - 786", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "pride-and-prejudice.txt", 1]}]
  server.handle_request("GET", "word", "pride-and-prejudice.txt", 1)  # => [["#2: elizabeth - 635", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "pride-and-prejudice.txt", 2]}]
end

Client.new.run
対話
What would you like to do?
1 - Quit
2 - Upload file
> 2
Name of file to upload?
> pride-and-prejudice.txt
#1: mr - 786
What would you like to do next?
1 - Quit
2 - Upload file
3 - See next most-frequently occurring word
> 3
#2: elizabeth - 635
What would you like to do next?
1 - Quit
2 - Upload file
3 - See next most-frequently occurring word
> 1
Goodbye cruel world...

ログインすればWEBサーバーがセッションの状態を持っているように感じるがそれはブラウザ側からクッキーを渡しているからなので、そう考えればサーバー側はセッションの状態を持ってないと言える。

ニューラルネットワーク

ここからの章は Python の Keras ライブラリを活用するスタイルになっているせいか、さすがに他言語で実装せよとは言われていないのだけど、ここまで来たなら Ruby で実装したかった。

しかし方法がさっぱりわからなかった。なので Python の Keras を Ruby から使う方法でやってみたが自分にはハードルが高すぎた。

具体的なところで言うと Keras の fit_generator にどのようにメソッドを渡せばよいのかわからなかった。本来は Python のジェネレータ関数を渡すべきなのだけど、それに相当すると思われる Ruby の Enumerator インスタンスを渡しても動くわけがなかった。結局 Python のコードを芋蔓式に PyCall.exec することになってほとんど Ruby で書く意味がなくなってしまった。

それでも35章だけは Ruby で書けた。これは Pyhton のコードを Ruby から呼ぶ方法を練習しただけであって、元のアルゴリズムを理解できているわけではない。

35. 浅いDense層のプログラム - ニューラルネットワーク

require "pycall/import"
include PyCall::Import

pyimport "keras"
pyfrom "keras.models", import: "Sequential"
pyfrom "keras.layers", import: "Dense"

require "numpy"

printable = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"\#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\v\f"
Characters = printable.chars                      # => ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "_", "`", "{", "|", "}", "~", " ", "\t", "\n", "\r", "\v", "\f"]
CharIndices = Characters.collect.with_index.to_h  # => {"0"=>0, "1"=>1, "2"=>2, "3"=>3, "4"=>4, "5"=>5, "6"=>6, "7"=>7, "8"=>8, "9"=>9, "a"=>10, "b"=>11, "c"=>12, "d"=>13, "e"=>14, "f"=>15, "g"=>16, "h"=>17, "i"=>18, "j"=>19, "k"=>20, "l"=>21, "m"=>22, "n"=>23, "o"=>24, "p"=>25, "q"=>26, "r"=>27, "s"=>28, "t"=>29, "u"=>30, "v"=>31, "w"=>32, "x"=>33, "y"=>34, "z"=>35, "A"=>36, "B"=>37, "C"=>38, "D"=>39, "E"=>40, "F"=>41, "G"=>42, "H"=>43, "I"=>44, "J"=>45, "K"=>46, "L"=>47, "M"=>48, "N"=>49, "O"=>50, "P"=>51, "Q"=>52, "R"=>53, "S"=>54, "T"=>55, "U"=>56, "V"=>57, "W"=>58, "X"=>59, "Y"=>60, "Z"=>61, "!"=>62, "\""=>63, "#"=>64, "$"=>65, "%"=>66, "&"=>67, "'"=>68, "("=>69, ")"=>70, "*"=>71, "+"=>72, ","=>73, "-"=>74, "."=>75, "/"=>76, ":"=>77, ";"=>78, "<"=>79, "="=>80, ">"=>81, "?"=>82, "@"=>83, "["=>84, "\\"=>85, "]"=>86, "^"=>87, "_"=>88, "`"=>89, "{"=>90, "|"=>91, "}"=>92, "~"=>93, " "=>94, "\t"=>95, "\n"=>96, "\r"=>97, "\v"=>98, "\f"=>99}
IndicesChar = CharIndices.invert                  # => {0=>"0", 1=>"1", 2=>"2", 3=>"3", 4=>"4", 5=>"5", 6=>"6", 7=>"7", 8=>"8", 9=>"9", 10=>"a", 11=>"b", 12=>"c", 13=>"d", 14=>"e", 15=>"f", 16=>"g", 17=>"h", 18=>"i", 19=>"j", 20=>"k", 21=>"l", 22=>"m", 23=>"n", 24=>"o", 25=>"p", 26=>"q", 27=>"r", 28=>"s", 29=>"t", 30=>"u", 31=>"v", 32=>"w", 33=>"x", 34=>"y", 35=>"z", 36=>"A", 37=>"B", 38=>"C", 39=>"D", 40=>"E", 41=>"F", 42=>"G", 43=>"H", 44=>"I", 45=>"J", 46=>"K", 47=>"L", 48=>"M", 49=>"N", 50=>"O", 51=>"P", 52=>"Q", 53=>"R", 54=>"S", 55=>"T", 56=>"U", 57=>"V", 58=>"W", 59=>"X", 60=>"Y", 61=>"Z", 62=>"!", 63=>"\"", 64=>"#", 65=>"$", 66=>"%", 67=>"&", 68=>"'", 69=>"(", 70=>")", 71=>"*", 72=>"+", 73=>",", 74=>"-", 75=>".", 76=>"/", 77=>":", 78=>";", 79=>"<", 80=>"=", 81=>">", 82=>"?", 83=>"@", 84=>"[", 85=>"\\", 86=>"]", 87=>"^", 88=>"_", 89=>"`", 90=>"{", 91=>"|", 92=>"}", 93=>"~", 94=>" ", 95=>"\t", 96=>"\n", 97=>"\r", 98=>"\v", 99=>"\f"}

INPUT_VOCAB_SIZE = Characters.length              # => 100

def encode_one_hot(line)
  x = Numpy.zeros([line.length, INPUT_VOCAB_SIZE])
  line.chars.each.with_index do |c, i|
    if Characters.include?(c)
      index = CharIndices[c]
    else
      index = CharIndices[" "]
    end
    x[i][index] = 1
  end
  x
end

def decode_one_hot(x)
  s = []

  x.class.name                                    # => "Numpy::NDArray", "Numpy::NDArray", "Numpy::NDArray"
  x.size                                          # => 1300, 3300, 3200
  PyCall.len(x)                                   # => 13, 33, 32
  x.each rescue $!                                # => #<NoMethodError:"undefined method `each' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       ...,\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107]], dtype=float32):Numpy::NDArray">, #<NoMethodError:"undefined method `each' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       ...,\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107]], dtype=float32):Numpy::NDArray">, #<NoMethodError:"undefined method `each' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       ...,\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107]], dtype=float32):Numpy::NDArray">
  x.length rescue $!                              # => #<NoMethodError:"undefined method `length' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       ...,\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107]], dtype=float32):Numpy::NDArray">, #<NoMethodError:"undefined method `length' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       ...,\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107]], dtype=float32):Numpy::NDArray">, #<NoMethodError:"undefined method `length' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       ...,\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107],\n       [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n        0.00983107]], dtype=float32):Numpy::NDArray">

  PyCall.len(x).times do |i|    # x.each は使えない
    onehot = x[i]
    one_index = Numpy.argmax(onehot)
    one_index = one_index.to_i  # one_index は Object 型になっているため to_i が必要
    s << IndicesChar[one_index]
  end
  s.join
end

def normalization_layer_set_weights(n_layer)
  wb = []

  w = Numpy.zeros([INPUT_VOCAB_SIZE, INPUT_VOCAB_SIZE], dtype: Numpy.float32)
  b = Numpy.zeros(INPUT_VOCAB_SIZE, dtype: Numpy.float32)

  ("a".."z").each do |c|
    i = CharIndices[c]
    w[i, i] = 1
  end

  ("A".."Z").each do |c|
    i = CharIndices[c]
    il = CharIndices[c.downcase]
    w[i, il] = 1
  end

  sp_idx = CharIndices[" "]
  chars = Characters - [*"a".."z"] - [*"A".."Z"]
  chars.each do |c|
    i = CharIndices[c]
    w[i, sp_idx] = 1
  end

  wb << w
  wb << b
  n_layer.set_weights(wb)
  n_layer
end

def build_model
  model = Sequential.new
  dense_layer = Dense.new(INPUT_VOCAB_SIZE, input_shape: [INPUT_VOCAB_SIZE], activation: "softmax")
  model.add(dense_layer)
  model
end

model = build_model
model.summary
normalization_layer_set_weights(model.layers[0])

# 動作検証
batch = encode_one_hot("Hello, world!")
preds = model.predict(batch)
normal = decode_one_hot(preds)                    # => "hello  world "

IO.foreach("input.txt") do |line|
  line = line.strip
  if line == ""
    next
  end
  batch = encode_one_hot(line)
  preds = model.predict(batch)
  normal = decode_one_hot(preds)
  puts " in: #{line.inspect}"
  puts "out: #{normal.inspect}"
end

36. 学習する浅いDense層 - 学習

TODO

37. 蝶ネクタイ - 多層ネットワーク

TODO

38. ニューロモノリス - シーケンス

TODO

39. スライディングウィンドウ - 畳み込み

TODO

40. リカレント : 回帰型ニューラルネットワーク

TODO

参照

https://www.amazon.co.jp/dp/4814400225

Discussion