『プログラミング文体練習』の演習問題「他言語で実装せよ」をRubyでやる
歴史的スタイル
1. 古き良き時代 - アセンブリ言語
限られたメモリしかなく、メモリはアドレスでのみで指定する
f = File.open("stop_words.txt", "r")
stop_words = f.read(1024).split(",")
f.close
data = []
data << nil
data << ""
data << nil
data << 0
data << false
data << ""
data << ""
data << 0
data << ""
if File.exist?("_word_freqs.txt")
File.delete("_word_freqs.txt")
end
word_freqs = File.open("_word_freqs.txt", "wb+")
f = File.open("pride-and-prejudice.txt")
# f = File.open("input.txt")
while true
data[1] = f.readline rescue ""
if data[1] == ""
break
end
if data[1][data[1].length - 1] != "\n"
data[1] += "\n"
end
data[2] = nil
data[3] = 0
for data[8] in data[1].chars
if data[2] == nil
if data[8].match?(/\p{Alpha}/)
data[2] = data[3]
end
else
if data[8].match?(/\P{Alpha}/)
data[4] = false
data[5] = data[1][data[2]...data[3]].downcase
if data[5].length >= 2 && !stop_words.include?(data[5])
while true
data[6] = word_freqs.readline.strip rescue ""
if data[6] == ""
break
end
data[7] = data[6].split(",")[1].to_i
data[6] = data[6].split(",")[0].strip
if data[5] == data[6]
data[7] += 1
data[4] = true
break
end
end
if !data[4]
word_freqs.printf("%20s,%04d\n", data[5], 1)
else
word_freqs.pos -= 26
word_freqs.printf("%20s,%04d\n", data[5], data[7])
end
word_freqs.pos = 0
end
data[2] = nil
end
end
data[3] += 1
end
end
f.close
word_freqs.flush
data.slice!(0..-1)
data = data + [[]] * (25 - data.length)
data << ""
data << 0
data << 0
while true
data[25] = word_freqs.readline.strip rescue ""
if data[25] == ""
break
end
data[26] = data[25].split(",")[1].to_i
data[25] = data[25].split(",")[0].strip
data[27] = 0
while data[27] < 25
if data[data[27]] == [] or data[data[27]][1] < data[26]
data.insert(data[27], [data[25], data[26]])
data.pop
break
end
data[27] += 1
end
end
data[25] = 0
while true
if data[25] >= 25
break
end
if data[data[25]].length != 2
break
end
puts "#{data[data[25]][0]} - #{data[data[25]][1]}"
data[25] += 1
end
word_freqs.close
現代の言語がどれだけありがたいかわかる
2. Forthで行こう - スタックマシン
変数の領域はスタックと小さなヒープ領域のみ
$stack = []
$heap = {}
def read_file
$stack.push(IO.read($stack.pop))
end
def filter_chars
$stack.push(/[\W_]+/)
str, re = $stack.pop(2)
$stack.push(str.gsub(re, " ").downcase)
end
def scan
$stack.push(*$stack.pop.split)
end
def remove_stop_words
$stack.push(IO.read("stop_words.txt").scan(/\w+/))
$stack.last.concat([*"a".."z"])
$heap[:stop_words] = $stack.pop
$heap[:words] = []
while !$stack.empty?
if $heap[:stop_words].include?($stack.last)
$stack.pop
else
$heap[:words].push($stack.pop)
end
end
$stack.push(*$heap[:words])
$heap.delete(:stop_words)
$heap.delete(:words)
end
def frequencies
$heap[:word_freqs] = {}
while !$stack.empty?
if count = $heap[:word_freqs][$stack.last] # count = word_freqs["foo"]
$stack.push(count) # [5]
$stack.push(1) # [5, 1]
$stack.push($stack.pop + $stack.pop) # [6]
else
$stack.push(1)
end
key, count = $stack.pop(2)
$heap[:word_freqs][key] = count # word_freqs["foo"] = 6
end
$stack.push($heap[:word_freqs])
$heap.delete(:word_freqs)
end
def sort
$stack.push(*$stack.pop.sort_by { _2 }) # 後ろから pop するため昇順でよい
end
$stack.push("pride-and-prejudice.txt")
read_file
filter_chars
scan
remove_stop_words
frequencies
sort
$stack.push(0)
while $stack.last < 25 && $stack.size > 1
$heap[:i] = $stack.pop
w, f = $stack.pop
puts "#{w} - #{f}"
# i += 1 相当
$stack.push($heap[:i])
$stack.push(1)
$stack.push($stack.pop + $stack.pop)
end
- 演算はスタック上でのみ行われる。
i += 1
などと書いてはいけない - スタックの状態を常に把握していないと(扱うのは)難しい
3. 配列プログラミング - ベクトル演算
繰り返すのではなく配列に適用する
# data = IO.read("pride-and-prejudice.txt")
data = "Hello World!"
characters = " #{data} ".chars # => [" ", "H", "e", "l", "l", "o", " ", " ", "W", "o", "r", "l", "d", "!", " "]
characters = characters.map(&:downcase) # => [" ", "h", "e", "l", "l", "o", " ", " ", "w", "o", "r", "l", "d", "!", " "]
characters = characters.map { |e| e.sub(/\P{Alpha}/, " ") } # => [" ", "h", "e", "l", "l", "o", " ", " ", "w", "o", "r", "l", "d", " ", " "]
sp = characters.map.with_index { |e, i| e == " " && i || nil } # => [0, nil, nil, nil, nil, nil, 6, 7, nil, nil, nil, nil, nil, 13, 14]
sp = sp.compact # => [0, 6, 7, 13, 14]
w_ranges = sp.each_cons(2).to_a # => [[0, 6], [6, 7], [7, 13], [13, 14]]
w_ranges = w_ranges.find_all { |a, b| (b - a) > 2 } # => [[0, 6], [7, 13]]
words = w_ranges.map { |a, b| characters[a..b] } # => [[" ", "h", "e", "l", "l", "o", " "], [" ", "w", "o", "r", "l", "d", " "]]
swords = words.map(&:join).map(&:strip) # => ["hello", "world"]
stop_words = IO.read("stop_words.txt").scan(/\w+/).to_set # => #<Set: {"a", "able", "about", "across", "after", "all", "almost", "also", "am", "among", "an", "and", "any", "are", "as", "at", "be", "because", "been", "but", "by", "can", "cannot", "could", "dear", "did", "do", "does", "either", "else", "ever", "every", "for", "from", "get", "got", "had", "has", "have", "he", "her", "hers", "him", "his", "how", "however", "i", "if", "in", "into", "is", "it", "its", "just", "least", "let", "like", "likely", "may", "me", "might", "most", "must", "my", "neither", "no", "nor", "not", "of", "off", "often", "on", "only", "or", "other", "our", "own", "rather", "said", "say", "says", "she", "should", "since", "so", "some", "than", "that", "the", "their", "them", "then", "there", "these", "they", "this", "tis", "to", "too", "twas", "us", "wants", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "would", "yet", "you", "your"}>
ns_words = swords.reject(&stop_words.method(:include?)) # => ["hello", "world"]
uniq = ns_words.tally # => {"hello"=>1, "world"=>1}
sorted = uniq.sort_by { -_2 } # => [["hello", 1], ["world", 1]]
took = sorted.take(25) # => [["hello", 1], ["world", 1]]
puts took.map { |e| e * " - " }
基本スタイル
4. 一枚岩 - モノリス
サブルーチンという考え方はない
word_freqs = []
stop_words = File.read("stop_words.txt").split(",")
stop_words.concat([*"a".."z"])
File.open("pride-and-prejudice.txt") do |f|
f.each_line do |line|
start_char = nil
line.each_char.with_index do |c, i|
if !start_char
if c.match?(/\p{Alpha}/)
start_char = i
end
else
if c.match?(/\P{Alpha}/)
found = false
word = line[start_char...i].downcase
unless stop_words.include?(word)
pair_index = 0
word_freqs.each do |pair|
if word == pair[0]
pair[1] += 1
found = true
break
end
pair_index += 1
end
if !found
word_freqs << [word, 1]
elsif !word_freqs.empty?
pair_index.pred.downto(0) do |n|
if word_freqs[pair_index][1] > word_freqs[n][1]
word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]
pair_index = n
end
end
end
end
start_char = nil
end
end
end
end
end
word_freqs.take(25).each do |a, b|
puts "#{a} - #{b}"
end
- なるべく変数のスコープを広くする
- バブルソートを自力で実装してなるべく実行速度を遅くする
- JavaScript を書いているとこんな感じになっていくのはなぜだろう
5. クックブック - 構造化プログラミング
料理をするかのように食材に対して順に変更を加えていく
$data = []
$words = []
$word_freqs = []
def read_file
$data += IO.read("pride-and-prejudice.txt").chars
end
def filter_chars_and_normalize
$data.collect! do |c|
if c.match?(/\p{Alpha}/)
c.downcase
else
" "
end
end
end
def scan
$words += $data.join.split
end
def remove_stop_words
$words -= IO.read("stop_words.txt").scan(/\w+/)
$words -= ("a".."z").to_a
end
def frequencies
$word_freqs = $words.tally
end
def sort
$word_freqs = $word_freqs.sort_by { |_, c| -c }
end
read_file
filter_chars_and_normalize
scan
remove_stop_words
frequencies
sort
$word_freqs.take(25).each do |v, c|
puts "#{v} - #{c}"
end
- 共有データのスコープがとても広い
- 同じメソッド(手続き)を連続で呼ぶと壊れたりする
6. パイプライン - 関数型プログラミング
クックブックの対極にある感じ
def read_file(path_to_file)
IO.read(path_to_file)
end
def filter_chars_and_normalize(str_data)
str_data.downcase.gsub(/[\W_]+/, " ")
end
def scan(str_data)
str_data.split
end
def remove_stop_words(word_list)
word_list - [*IO.read("stop_words.txt").scan(/\w+/), *"a".."z"]
end
def frequencies(word_list)
word_list.tally
end
def sort(word_freqs)
word_freqs.sort_by { -_2 }
end
def print_all(word_freqs)
if word_freqs.empty?
return
end
w, f = word_freqs.first
puts "#{w} - #{f}"
print_all word_freqs.drop(1)
end
print_all(sort(frequencies(remove_stop_words(scan(filter_chars_and_normalize(read_file("pride-and-prejudice.txt")))))).take(25))
実行順序が右から左になってちょっと見にくい
7. コードゴルフ - ワンライナー
できるだけ少ない行数で実装する
stop_words = IO.read("stop_words.txt").scan(/\w+/)
words = IO.read("pride-and-prejudice.txt").downcase.scan(/[a-z]{2,}/)
puts (words - stop_words).tally.sort_by { -_2 }.take(25).map { |e| e * " - " }
関数合成
8. 合わせ鏡 - 再帰
繰り返しは再帰で行う
RubyVM::InstructionSequence.compile(<<~CODE, __FILE__, __dir__, __LINE__, tailcall_optimization: true).eval
def count(words, stop_words, word_freqs)
if words.empty?
return
end
word = words.first
unless stop_words.include?(word)
word_freqs[word] += 1
end
count(words.drop(1), stop_words, word_freqs)
end
CODE
def print_all(word_freqs)
if word_freqs.empty?
return
end
w, f = word_freqs.first
puts "#{w} - #{f}"
print_all word_freqs.drop(1)
end
stop_words = File.read("stop_words.txt").scan(/\w+/).to_set
words = File.read("input.txt").downcase.scan(/[a-z]{2,}/)
words = File.read("pride-and-prejudice.txt").downcase.scan(/[a-z]{2,}/)
word_freqs = Hash.new(0)
count(words, stop_words, word_freqs)
print_all word_freqs.sort_by { -_2 }.take(25)
デフォルトでは末尾再帰最適化が効いていないので普通に実行するとスタックが死ぬ。その場合 tailcall_optimization: true
で該当コードをコンパイルするか words.each_slice(1000) などとして count を分割して呼ぶ。
9. 継続 - 参照渡し
次に実行するメソッドを渡す
def read_file(path, func)
func.(IO.read(path), method(:normalize))
end
def filter_chars(str, func)
func.(str.gsub(/[\W_]+/, " "), method(:scan))
end
def normalize(str, func)
func.(str.downcase, method(:remove_stop_words))
end
def scan(str, func)
func.(str.split, method(:frequencies))
end
def remove_stop_words(words, func)
stop_words = File.read("stop_words.txt").scan(/\w+/) + ("a".."z").to_a
func.(words - stop_words, method(:sort))
end
def frequencies(words, func)
func.(words.tally, method(:print_text))
end
def sort(freqs, func)
func.(freqs.sort_by { -_2 }, method(:no_op))
end
def print_text(freqs, func)
puts freqs.take(25).collect { |e| e * " - " }
func.(method(:no_op))
end
def no_op(func)
end
read_file("pride-and-prejudice.txt", method(:filter_chars))
頭がこんがらがる
10. 単子 - モナド
右から左に実行するイメージになってしまうパイプラインの問題を解決する
class TFTheOne
def initialize(value)
@value = value
end
def bind(func)
@value = func[@value]
self
end
def to_s
@value.to_s
end
end
def read_file(path)
IO.read(path)
end
def filter_chars(str)
str.gsub(/[\W_]+/, " ")
end
def normalize(str)
str.downcase
end
def scan(str)
str.split
end
def remove_stop_words(words)
stop_words = IO.read("stop_words.txt").scan(/\w+/) + ("a".."z").to_a
words - stop_words
end
def frequencies(words)
words.tally
end
def sort(freqs)
freqs.sort_by { -_2 }
end
def top25_freqs(freqs)
freqs.take(25).collect { |e| e * " - " } * "\n"
end
TFTheOne.new("pride-and-prejudice.txt")
.bind(method(:read_file))
.bind(method(:filter_chars))
.bind(method(:normalize))
.bind(method(:scan))
.bind(method(:remove_stop_words))
.bind(method(:frequencies))
.bind(method(:sort))
.bind(method(:top25_freqs))
.display
- 適用するメソッドを順に書ける
- パイプラインでは右から左だったが右から左(または上から下)になっている
- 値が順に変化していく様子はカプセル化されたクックブックスタイルにも見える
オブジェクトとオブジェクトの相互作用
11. モノのプログラム - オブジェクト
データに直接アクセスさせない
class Document
def initialize(path_to_file)
@path_to_file = path_to_file
end
def each(...)
words.each(...)
end
private
def words
@words ||= IO.read(@path_to_file).downcase.scan(/[a-z]{2,}/)
end
end
class StopWordList
def include?(word)
set.include?(word)
end
private
def set
@set ||= IO.read("stop_words.txt").scan(/\w+/).to_set
end
end
class Frequency
def initialize
@freqs = Hash.new(0)
end
def increment(word)
@freqs[word] += 1
end
def sorted
@freqs.sort_by { -_2 }
end
end
class Controller
def initialize(path_to_file)
@document = Document.new(path_to_file)
@stop_word_list = StopWordList.new
@frequency = Frequency.new
end
def run
@document.each do |word|
unless @stop_word_list.include?(word)
@frequency.increment(word)
end
end
puts @frequency.sorted.take(25).collect { |e| e * " - " }
end
end
Controller.new("pride-and-prejudice.txt").run
12. レターボックス - メッセージパッシング
やりとりは dispatch のみ
class Document
def dispatch(...)
send(...)
end
private
def setup(path_to_file)
@data = IO.read(path_to_file)
end
def words
@data.downcase.scan(/[a-z]{2,}/)
end
end
class StopWordList
def dispatch(...)
send(...)
end
private
def setup
@stop_words = File.read("stop_words.txt").scan(/\w+/).to_set
end
def include?(word)
@stop_words.include?(word)
end
end
class Frequency
def initialize
@freqs = Hash.new(0)
end
def dispatch(...)
send(...)
end
private
def increment(word)
@freqs[word] += 1
end
def sorted
@freqs.sort_by { -_2 }
end
end
class Controller
def dispatch(...)
send(...)
end
private
def setup(path_to_file)
@document = Document.new
@stop_word_list = StopWordList.new
@frequency = Frequency.new
@document.dispatch(:setup, path_to_file)
@stop_word_list.dispatch(:setup)
end
def run
@document.dispatch(:words).each do |word|
unless @stop_word_list.dispatch(:include?, word)
@frequency.dispatch(:increment, word)
end
end
puts @frequency.dispatch(:sorted).take(25).collect { |e| e * " - " }
end
end
controller = Controller.new
controller.dispatch(:setup, "pride-and-prejudice.txt")
controller.dispatch(:run)
13. 閉写像 - プロトタイプ
クラスを持っていない言語がハッシュをどうにかしてクラスのように扱う
data_storage_obj = {
:data => [],
:init => -> path_to_file { data_storage_obj[:data] = IO.read(path_to_file).downcase.scan(/[a-z]{2,}/) },
:words => -> { data_storage_obj[:data] },
}
stop_words_obj = {
:stop_words => [],
:init => -> { stop_words_obj[:stop_words] = IO.read("stop_words.txt").scan(/\w+/) },
:include? => -> word { stop_words_obj[:stop_words].include?(word) },
}
word_freqs_obj = {
:freqs => Hash.new(0),
:increment => -> word { word_freqs_obj[:freqs][word] += 1 },
:sorted => -> { word_freqs_obj[:freqs].sort_by { -_2 } },
:top25 => -> { word_freqs_obj[:sorted].call.take(25) },
}
data_storage_obj[:init].call("pride-and-prejudice.txt")
stop_words_obj[:init].call
data_storage_obj[:words].call.each do |word|
unless stop_words_obj[:include?][word]
word_freqs_obj[:increment][word]
end
end
puts word_freqs_obj[:top25].call.collect { |e| e * " - " }
14. 抽象的なモノ - 抽象データ型
Java の interface のようなもの
class IDocument
def each(...)
raise NotImplementedError, "#{__method__} is not implemented"
end
end
class IStopWordList
def include?(word)
raise NotImplementedError, "#{__method__} is not implemented"
end
end
class IFrequency
def increment(word)
raise NotImplementedError, "#{__method__} is not implemented"
end
def sorted
raise NotImplementedError, "#{__method__} is not implemented"
end
end
class Document < IDocument
def initialize(path_to_file)
@path_to_file = path_to_file
end
def each(...)
words.each(...)
end
private
def words
@words ||= IO.read(@path_to_file).downcase.scan(/[a-z]{2,}/)
end
end
class StopWordList < IStopWordList
def include?(word)
set.include?(word)
end
private
def set
@set ||= IO.read("stop_words.txt").scan(/\w+/).to_set
end
end
class Frequency < IFrequency
def initialize
@freqs = Hash.new(0)
end
def increment(word)
@freqs[word] += 1
end
def sorted
@freqs.sort_by { -_2 }
end
end
class Controller
def initialize(path_to_file)
@document = Document.new(path_to_file)
@stop_word_list = StopWordList.new
@frequency = Frequency.new
end
def run
@document.each do |word|
unless @stop_word_list.include?(word)
@frequency.increment(word)
end
end
puts @frequency.sorted.take(25).collect { |e| e * " - " }
end
end
Controller.new("pride-and-prejudice.txt").run
ダックタイピングな言語だとそんなに利点はない
15. ハリウッド - 制御の反転
必要となったとき呼ばれる
class Framework
def initialize
@load_event_handlers = []
@dowork_event_handlers = []
@end_event_handlers = []
end
def register_for_load_event(handler)
@load_event_handlers << handler
end
def register_for_dowork_event(handler)
@dowork_event_handlers << handler
end
def register_for_end_event(handler)
@end_event_handlers << handler
end
def run(path_to_file)
@load_event_handlers.each { |e| e.call(path_to_file) }
@dowork_event_handlers.each(&:call)
@end_event_handlers.each(&:call)
end
end
class Document
attr_reader :word_event_handlers
def initialize(app, stop_word_list)
@word_event_handlers = []
app.register_for_load_event(method(:load))
app.register_for_dowork_event(method(:produce_words))
@stop_word_list = stop_word_list
end
private
def load(path_to_file)
@data = IO.read(path_to_file)
end
def produce_words
@data.downcase.scan(/[a-z]{2,}/) do |word|
unless @stop_word_list.include?(word)
@word_event_handlers.each { |e| e.call(word) }
end
end
end
end
class StopWordList
def initialize(app)
app.register_for_load_event(method(:load))
end
def include?(word)
@stop_words.include?(word)
end
private
def load(...)
@stop_words = IO.read("stop_words.txt").scan(/\w+/).to_set
end
end
class Frequency
def initialize(app, document)
@freqs = Hash.new(0)
document.word_event_handlers << method(:increment)
app.register_for_end_event(method(:display))
end
private
def increment(word)
@freqs[word] += 1
end
def display
puts @freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }
end
end
app = Framework.new
stop_word_list = StopWordList.new(app)
document = Document.new(app, stop_word_list)
frequency = Frequency.new(app, document)
app.run("pride-and-prejudice.txt")
実行順序が把握しづらい
16. 掲示板 - pub/sub
やりとりは中央にある掲示板を通してのみ
class EventHub
def initialize
@subscriptions = Hash.new { |h, k| h[k] = [] }
end
def subscribe(type, handler)
@subscriptions[type] << handler
end
def publish(type, *args)
if @subscriptions[type]
@subscriptions[type].each { |e| e.call(*args) }
end
end
end
class Document
def initialize(event_hub)
@event_hub = event_hub
@event_hub.subscribe(:load, method(:load))
@event_hub.subscribe(:start, method(:produce_words))
end
private
def load(path_to_file)
@data = IO.read(path_to_file)
end
def produce_words
@data.downcase.scan(/[a-z]{2,}/) do |word|
@event_hub.publish(:word, word)
end
@event_hub.publish(:eof)
end
end
class StopWordList
def initialize(event_hub)
@stop_words = []
@event_hub = event_hub
@event_hub.subscribe(:load, method(:load))
@event_hub.subscribe(:word, method(:include?))
end
def load(*)
@stop_words = IO.read("stop_words.txt").scan(/\w+/)
end
def include?(word)
unless @stop_words.include?(word)
@event_hub.publish(:valid_word, word)
end
end
end
class Frequency
def initialize(event_hub)
@word_freqs = Hash.new(0)
@event_hub = event_hub
@event_hub.subscribe(:valid_word, method(:increment))
@event_hub.subscribe(:print, method(:display))
end
def increment(word)
@word_freqs[word] += 1
end
def display
puts @word_freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }
end
end
class Application
def initialize(event_hub)
@event_hub = event_hub
@event_hub.subscribe(:run, method(:run))
@event_hub.subscribe(:eof, method(:stop))
end
def run(path_to_file)
@event_hub.publish(:load, path_to_file)
@event_hub.publish(:start)
end
def stop
@event_hub.publish(:print)
end
end
event_hub = EventHub.new
Document.new(event_hub)
StopWordList.new(event_hub)
Frequency.new(event_hub)
Application.new(event_hub)
event_hub.publish(:run, "pride-and-prejudice.txt")
ハリウッドスタイル以上に実行順序の把握が難しい
リフレクションとメタプログラミング
17. 内省性 - イントロスペクション
自分自身の情報にアクセスする
def read_stop_words
if caller_locations(1..1).first.label == "extract_words"
IO.read("stop_words.txt").scan(/\w+/)
end
end
def extract_words(path_to_file)
data = IO.read(binding.local_variable_get(:path_to_file))
words = data.downcase.scan(/[a-z]{2,}/)
words - read_stop_words
end
def frequencies(words)
binding.local_variable_get(:words).tally
end
freqs = frequencies(extract_words("pride-and-prejudice.txt"))
puts freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }
read_stop_words は extract_words から呼ばれているときだけ動く
18. 自己反映性 - リフレクション
実行時にコードを自分で作る
stops = IO.read("stop_words.txt").scan(/\w+/)
if true
extract_words_func = %(-> path_to_file { File.read(path_to_file).downcase.scan(/[a-z]{2,}/) - stops })
frequencies_func = %(-> words { words.tally })
sort_func = %(-> freqs { freqs.sort_by { -_2 } })
path_to_name = "pride-and-prejudice.txt"
else
extract_words_func = %(-> path_to_file { [] })
frequencies_func = %(-> words { [] })
sort_func = %(-> freqs { {} })
path_to_name = __FILE__
end
extract_words = eval(extract_words_func)
frequencies = eval(frequencies_func)
sort = eval(sort_func)
freqs = sort[frequencies[extract_words[path_to_name]]]
puts freqs.take(25).collect { |e| e * " - " }
19. 横断的関心 - アスペクト指向
元のコードはそのままで機能を追加する
require "active_support/core_ext/benchmark"
def extract_words(path_to_file)
words = IO.read(path_to_file).downcase.scan(/[a-z]{2,}/)
stop_words = IO.read("stop_words.txt").scan(/\w+/)
words - stop_words
end
def frequencies(words)
words.tally
end
def sort(freqs)
freqs.sort_by { -_2 }
end
def profile(*names)
names.each do |name|
m = method(name)
define_method name do |*args, &block|
ret_value = nil
elapsed = Benchmark.ms do
ret_value = m.call(*args, &block)
end
puts "#{m.name}: #{elapsed.round(2)}ms"
ret_value
end
end
end
profile :extract_words, :frequencies, :sort
freqs = sort(frequencies(extract_words("pride-and-prejudice.txt")))
puts freqs.take(25).collect { |e| e * " - " }
profile :extract_words
で extract_words
メソッドにベンチーマーク機能がつく
20. プラグイン - 依存性注入
メインプログラムはそのままで実装を切り替える
plugins:
extract_words: extract_words1.rb
frequencies: frequencies1.rb
def extract_words(path_to_file)
words = IO.read(path_to_file).downcase.scan(/[a-z]{2,}/)
stop_words = IO.read("stop_words.txt").scan(/\w+/)
words - stop_words
end
def frequencies(words)
words.tally.sort_by { -_2 }.take(25)
end
require "yaml"
config = YAML.load_file("config.yml")
eval IO.read(config.dig("plugins", "extract_words"))
eval IO.read(config.dig("plugins", "frequencies"))
word_freqs = frequencies(extract_words("pride-and-prejudice.txt"))
puts word_freqs.collect { |e| e * " - " }
異常事態
21. 構成主義 - 防御的プログラミング
エラーはなかったことにする
def extract_words(path_to_file)
unless path_to_file.kind_of?(String) && !path_to_file.empty?
return []
end
begin
data = IO.read(path_to_file)
rescue => error
puts error
return []
end
data.downcase.scan(/[a-z]{2,}/)
end
def remove_stop_words(words)
unless words.kind_of?(Array)
return []
end
begin
stop_words = IO.read("stop_words.txt").scan(/\w+/)
rescue => error
puts error
return words
end
words - stop_words
end
def frequencies(words)
unless words.kind_of?(Array) && !words.empty?
return {}
end
words.tally
end
def sort(freqs)
unless freqs.kind_of?(Hash) && !freqs.empty?
return []
end
freqs.sort_by { -_2 }
end
filename = ARGV.first || "pride-and-prejudice.txt"
freqs = sort(frequencies(remove_stop_words(extract_words(filename))))
puts freqs.take(25).collect { |e| e * " - " }
- HTML や CSS が壊れていてもブラウザが止まらないはこのスタイルだから
- Ruby で
"foo"[100]
がnil
を返したり"".to_i
が0
を返すのもこのスタイル
22. 癇癪持ち - 契約による設計
エラーが起きたらすぐに抗議する
def extract_words(path_to_file)
path_to_file.kind_of?(String) or raise TypeError, "I need a string!"
!path_to_file.empty? or raise ArgumentError, "I need a non-empty string!"
begin
text = IO.read(path_to_file)
rescue => error
puts error.detailed_message
raise error
end
text.downcase.scan(/[a-z]{2,}/)
end
def remove_stop_words(words)
words.kind_of?(Array) or raise TypeError, "I need a list!"
begin
text = IO.read("stop_words.txt")
rescue => error
puts error.detailed_message
raise error
end
stop_words = text.scan(/\w+/)
words - stop_words
end
def frequencies(words)
words.kind_of?(Array) or raise TypeError, "I need a list!"
!words.empty? or raise ArgumentError, "I need a non-empty list!"
words.tally
end
def sort(freqs)
freqs.kind_of?(Hash) or raise TypeError, "I need a dictionary!"
!freqs.empty? or raise ArgumentError, "I need a non-empty dictionary!"
freqs.sort_by { -_2 }
end
begin
freqs = sort(frequencies(remove_stop_words(extract_words("pride-and-prejudice.txt"))))
freqs.kind_of?(Array) or raise TypeError, "OMG! This is not a list!"
freqs.length >= 25 or raise "SRSLY? Less than 25 words!"
puts freqs.take(25).collect { |e| e * " - " }
rescue => error
puts error.detailed_message
raise error
end
あまりに多い場合は専用の assert メソッドなどを用意した方がいいかもしれない
def assert(expr, message = nil)
expr or raise message || "Assertion failed"
end
assert を使わない場合は こうあるべき or raise
形式で書けば assert っぽく読める
1 + 2 == 3 or raise "Assertion failed"
23. 受動的攻撃 - 例外
エラーはあとで抗議する
def extract_words(path_to_file)
path_to_file.kind_of?(String) or raise TypeError, "I need a string!"
!path_to_file.empty? or raise ArgumentError, "I need a non-empty string!"
IO.read(path_to_file).downcase.scan(/[a-z]{2,}/)
end
def remove_stop_words(words)
words.kind_of?(Array) or raise TypeError, "I need a list!"
stop_words = IO.read("stop_words.txt").scan(/\w+/)
words - stop_words
end
def frequencies(words)
words.kind_of?(Array) or raise TypeError, "I need a list!"
!words.empty? or raise ArgumentError, "I need a non-empty list!"
words.tally
end
def sort(freqs)
freqs.kind_of?(Hash) or raise TypeError, "I need a dictionary!"
!freqs.empty? or raise ArgumentError, "I need a non-empty dictionary!"
freqs.sort_by { -_2 }
end
begin
freqs = sort(frequencies(remove_stop_words(extract_words("pride-and-prejudice.txt"))))
freqs.kind_of?(Array) or raise TypeError, "OMG! This is not a list!"
freqs.length >= 25 or raise "SRSLY? Less than 25 words!"
puts freqs.take(25).collect { |e| e * " - " }
rescue => error
puts error.full_message
end
- なるべく外側で拾うという点が「癇癪持ち」スタイルとは異なる
- 一般的に良いとされている方法
24. 意図の宣言 - 型注釈
ダックタイピングの敵
# typed: strict
require "sorbet-runtime"
extend T::Sig
sig { params(path_to_file: String).returns(T::Array[T.any(T::Array[String], String)]) }
def extract_words(path_to_file)
IO.read(path_to_file).downcase.scan(/[a-z]{2,}/)
end
sig { params(words: T::Array[T.untyped]).returns(T::Array[String]) }
def remove_stop_words(words)
stop_words = IO.read("stop_words.txt").scan(/\w+/)
words - stop_words
end
sig { params(words: T::Array[T.untyped]).returns(T::Hash[String, Integer]) }
def frequencies(words)
words.tally
end
sig { params(freqs: T::Hash[T.untyped, T.untyped]).returns(T::Array[T::Array[String]]) }
def sort(freqs)
freqs.sort_by { -_2 }
end
freqs = sort(frequencies(remove_stop_words(extract_words("pride-and-prejudice.txt"))))
puts freqs.take(25).collect { |e| e * " - " }
25. 検疫 - 純粋関数と不純関数
表示を伴なうような不純関数を遅延評価させて純粋関数化する
class Quarantine
def initialize
@funcs = []
end
def bind(func)
@funcs << func
self
end
def execute
value = @funcs.reduce(-> {}) { |a, e| e[guard_callable(a)] }
puts guard_callable(value)
end
private
def guard_callable(value)
if value.respond_to?(:call)
value.call
else
value
end
end
end
def get_input(*)
-> { ARGV.first || "pride-and-prejudice.txt" }
end
def extract_words(path_to_file)
-> { IO.read(path_to_file).downcase.scan(/[a-z]{2,}/) }
end
def remove_stop_words(words)
-> { words - IO.read("stop_words.txt").scan(/\w+/) }
end
def frequencies(words)
words.tally
end
def sort(freqs)
freqs.sort_by { -_2 }
end
def top25_freqs(freqs)
puts freqs.take(25).collect { |e| e * " - " }
end
Quarantine.new
.bind(method(:get_input))
.bind(method(:extract_words))
.bind(method(:remove_stop_words))
.bind(method(:frequencies))
.bind(method(:sort))
.bind(method(:top25_freqs))
.execute
データ中心
26. データベース - SQL
とりあえず全部DBに入れておく
require "active_record"
require "active_support/core_ext/object/with_options"
ActiveRecord::Base.establish_connection(adapter: "sqlite3", database: "_tf.db")
ActiveRecord::Migration.verbose = false
ActiveRecord::Schema.define do
with_options if_not_exists: true do
create_table :documents do |t|
t.string :name
end
create_table :words do |t|
t.belongs_to :document
t.string :value
end
create_table :characters do |t|
t.belongs_to :word
t.string :value
end
end
end
class Document < ActiveRecord::Base
has_many :words, dependent: :destroy
after_create do
words.create!(extract_words.collect { |e| { value: e } })
end
private
def extract_words
stop_words = IO.read("stop_words.txt").scan(/\w+/)
words = IO.read(name).downcase.scan(/[a-z]{2,}/)
words - stop_words
end
end
class Word < ActiveRecord::Base
belongs_to :document
has_many :characters, dependent: :destroy
scope :frequency, -> { group(:value).order(:count_all).reverse_order }
after_create do
characters.insert_all!(value.chars.collect { |e| { value: e } })
end
end
class Character < ActiveRecord::Base
belongs_to :word
end
document = Document.find_or_create_by!(name: "pride-and-prejudice.txt")
Document.count # => 1
Word.count # => 56615
Character.count # => 354865
puts document.words.frequency.limit(25).count.collect { |e| e * " - " }
富豪的で好き。初回だけめっちゃ時間かかる。
27. スプレッドシート - リアクティブプログラミング
表計算をイメージする
class Column
attr_accessor :values
def initialize(&formula)
@formula = formula
@values = []
end
def update
if @formula
@values = @formula.call
end
end
end
all_words = Column.new
stop_words = Column.new
non_stop_words = Column.new { all_words.values.collect { |e| stop_words.values.include?(e) ? nil : e } }
unique_words = Column.new { non_stop_words.values.compact.uniq }
counts = Column.new { unique_words.values.collect { |e| non_stop_words.values.count(e) } }
sorted_data = Column.new { unique_words.values.zip(counts.values).sort_by { -_2 } }
all_columns = { all_words:, stop_words:, non_stop_words:, unique_words:, counts:, sorted_data: }
update = -> { all_columns.values.each(&:update) }
all_words.values = IO.read("pride-and-prejudice.txt").downcase.scan(/[a-z]{2,}/)
stop_words.values = IO.read("stop_words.txt").scan(/\w+/)
update.call
puts sorted_data.values.take(25).collect { |e| e * " - " }
実際に視覚化するとわかりやすい
require "table_format"
all_words.values = %w(a b c bar b c foo c)
stop_words.values = %w(foo bar)
update.call
tp all_words.values.each_index.collect { |i|
all_columns.inject({}) { |a, (k, v)| a.merge(k => v.values[i]) }
}
all_words | stop_words | non_stop_words | unique_words | counts | sorted_data |
---|---|---|---|---|---|
a | foo | a | a | 1 | ["c", 3] |
b | bar | b | b | 2 | ["b", 2] |
c | c | c | 3 | ["a", 1] | |
bar | |||||
b | b | ||||
c | c | ||||
foo | |||||
c | c |
28. データストリーム - ジェネレータ
必要な分だけ少しづつ処理する
def lines(path_to_file)
File.open(path_to_file) do |f|
f.each_line do |line|
yield line.downcase
end
end
end
def all_words(path_to_file)
lines(path_to_file) do |line|
line.scan(/[a-z]{2,}/) do |word|
yield word
end
end
end
def non_stop_words(path_to_file)
stop_words = IO.read("stop_words.txt").scan(/\w+/).to_set
all_words(path_to_file) do |word|
unless stop_words.include?(word)
yield word
end
end
end
def count_and_sort(path_to_file)
freqs = Hash.new(0)
i = 0
non_stop_words(path_to_file) do |word|
freqs[word] += 1
if i.modulo(5000).zero?
yield freqs.sort_by { -_2 }
end
i += 1
end
yield freqs.sort_by { -_2 }
end
count_and_sort("pride-and-prejudice.txt") do |freqs|
puts "-----------------------------"
puts freqs.take(25).collect { |e| e * " - " }
end
終わりがないデータやまとめて処理するメモリが足りないときに使う
並行性
29. アクター - スレッド
スレッド間のやりとりはスレッド毎のキューへのプッシュのみ
require "active_support/core_ext/module/delegation"
class ActiveObject
delegate :<<, to: :@queue
delegate :join, to: :@thread
private delegate :kill, to: :@thread
def initialize
@queue = Queue.new
@thread = Thread.start do
loop do
dispatch(*@queue.shift)
end
end
end
private
if false
def dispatch(type, *args)
puts "#{self.class.name}##{type}"
send(type, *args)
end
else
def dispatch(...)
send(...)
end
end
end
class Document < ActiveObject
private
def setup(path_to_file, stop_word_list)
@stop_word_list = stop_word_list
@data = IO.read(path_to_file)
end
def process_words(recipient)
@data.downcase.scan(/[a-z]{2,}/) do |word|
@stop_word_list << [:filter, word]
end
@stop_word_list << [:sorted, recipient]
end
def kill
@stop_word_list << [:kill]
super
end
end
class StopWordList < ActiveObject
private
def setup(frequency)
@frequency = frequency
@stop_words = IO.read("stop_words.txt").scan(/\w+/).to_set
end
def filter(word)
unless @stop_words.include?(word)
@frequency << [:increment, word]
end
end
def sorted(recipient)
@frequency << [:sorted, recipient]
end
def kill
@frequency << [:kill]
super
end
end
class Frequency < ActiveObject
private
def increment(word)
freqs[word] += 1
end
def sorted(recipient)
recipient << [:top25, freqs.sort_by { -_2 }]
end
def freqs
@freqs ||= Hash.new(0)
end
end
class Controller < ActiveObject
private
def run(document)
@document = document
@document << [:process_words, self]
end
def top25(sorted)
puts sorted.take(25).collect { |e| e * " - " }
self << [:kill]
end
def kill
@document << [:kill]
super
end
end
frequency = Frequency.new
stop_word_list = StopWordList.new
stop_word_list << [:setup, frequency]
document = Document.new
document << [:setup, "pride-and-prejudice.txt", stop_word_list]
controller = Controller.new
controller << [:run, document]
[frequency, stop_word_list, document, controller].each(&:join)
スレッドを綺麗に終了させるのが難しいので単に (Thread.list - [Thread.main]).each(&:kill)
でもいいかもしれない
30. データ空間 - 並列処理
スレッド間のやりとりはスレッドとは独立した2つのキューのみ
require "timeout"
word_space = Queue.new
freq_space = Queue.new
stop_words = IO.read("stop_words.txt").scan(/\w+/).to_set
IO.read("pride-and-prejudice.txt").downcase.scan(/[a-z]{2,}/) do |word|
word_space << word
end
5.times.collect { |i|
Thread.start do
freqs = Hash.new(0)
loop do
word = nil
begin
Timeout.timeout(1) do
word = word_space.shift
end
rescue Timeout::Error
break
end
Thread.pass # 激しく分散させるため
unless stop_words.include?(word)
freqs[word] += 1
end
end
freq_space << freqs
end
}.each(&:join)
freqs = {}
while !freq_space.empty?
freqs.update(freq_space.shift) { _2 + _3 }
end
puts freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }
1秒間暇だったらスレッドたちは自動的に終了する
31. マップリデュース - MapReduce
単語抽出を並列処理する
def partition(text, nlines)
text.lines.each_slice(nlines).map do |lines|
yield lines.join
end
end
def split_words(text)
words = text.downcase.scan(/[a-z]{2,}/)
stop_words = IO.read("stop_words.txt").scan(/\w+/)
(words - stop_words).tally # 演習問題31-2の部分カウントを適用する
end
text = IO.read("pride-and-prejudice.txt")
splits = partition(text, 200) { |e| Thread.start { split_words(e) } }.map(&:value)
freqs = splits.reduce({}) { |a, e| a.merge(e) { _2 + _3 } }
puts freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }
途中で [[word1, 1], [word2, 1]]
の形式にする利点がわからなかったので単に [word1, word2]
とした。
map { |e| e }
は map { |e| Thread.start { e } }.map(&:value)
の形に置き換えることができる。
[3, 4].map { |e| e.next } # => [4, 5]
[3, 4].map { |e| Thread.start { e.next } }.map(&:value) # => [4, 5]
なので並行性(演習問題31-3)を適用しない場合は次のように元に戻しても結果は変わらない。
- splits = partition(text, 200) { |e| Thread.start { split_words(e) } }.map(&:value)
+ splits = partition(text, 200) { |e| split_words(e) }
32. 二重マップリデュース - Hadoop
単語抽出を並列処理したあと再編成して頻度集計も並列処理する
def partition(text, nlines)
text.lines.each_slice(nlines).map do |lines|
yield lines.join
end
end
def split_words(text)
words = text.downcase.scan(/[a-z]{2,}/)
stop_words = IO.read("stop_words.txt").scan(/\w+/)
words - stop_words
end
text = IO.read("pride-and-prejudice.txt")
splits = partition(text, 200) { |e| Thread.start { split_words(e) } }.map(&:value)
splits_per_word = splits.reduce({}) { |a, e| a.merge(e.group_by(&:itself)) { _2 + _3 } }
freqs = splits_per_word.map { |k, v| Thread.start { [k, v.size] } }.map(&:value)
puts freqs.sort_by { -_2 }.take(25).collect { |e| e * " - " }
対話性
33. 三位一体 - MVC
データと表示と制御に分ける
class WordFrequenciesModel
attr_accessor :freqs
def initialize(path_to_file)
@freqs = {}
update(path_to_file)
end
def update(path_to_file)
words = IO.read(path_to_file).downcase.scan(/[a-z]{2,}/)
@freqs = (words - stop_words).tally
end
private
def stop_words
@stop_words ||= IO.read("stop_words.txt").scan(/\w+/)
end
end
class WordFrequenciesView
def initialize(model)
@model = model
end
def render
sorted_freqs = @model.freqs.sort_by { -_2 }.take(25)
puts sorted_freqs.collect { |e| e * " - " }
end
end
class WordFrequencyController
def initialize(model, view)
@model, @view = model, view
end
def show
@view.render
end
end
m = WordFrequenciesModel.new("pride-and-prejudice.txt")
v = WordFrequenciesView.new(m)
c = WordFrequencyController.new(m, v)
c.show
- 分離する基準が人によって異なる
- 上のコードの場合、並び替えの債務が定まらない
34. レストフル - ステートレス
セッションの状態はクライアントが持つ
class Server
def initialize
@data = {}
end
def handle_request(verb, uri, *args)
send("handler_#{verb}_#{uri}".downcase, *args)
end
private
def handler_get_default
rep = []
rep << "What would you like to do?"
rep << "1 - Quit"
rep << "2 - Upload file"
links = {
"1" => ["POST", "quit"],
"2" => ["GET", "file_form"],
}
[rep, links]
end
def handler_post_quit
puts "Goodbye cruel world..."
exit
end
def handler_get_file_form
["Name of file to upload?", ["POST", "file"]]
end
def handler_post_file(filename)
create_data(filename)
handler_get_word(filename, 0)
end
def handler_get_word(filename, index)
word, count = freq_at_index(filename, index)
rep = []
rep << "##{index.next}: #{word} - #{count}"
rep << "What would you like to do next?"
rep << "1 - Quit"
rep << "2 - Upload file"
rep << "3 - See next most-frequently occurring word"
links = {
"1" => ["POST", "quit"],
"2" => ["GET", "file_form"],
"3" => ["GET", "word", filename, index.next],
}
[rep, links]
end
def freq_at_index(filename, index)
@data[filename][index] || ["no more words", 0]
end
def create_data(filename)
@data[filename] ||= yield_self do
words = IO.read(filename).downcase.scan(/[a-z]{2,}/)
@data[filename] = (words - stop_words).tally.sort_by { -_2 }
end
end
def stop_words
@stop_words ||= IO.read("stop_words.txt").scan(/\w+/)
end
end
class Client
def initialize
@server = Server.new
end
def run
request = ["GET", "default"]
loop do
state_representation, links = @server.handle_request(*request)
request = render_and_get_input(state_representation, links)
end
end
private
def render_and_get_input(state_representation, links)
puts state_representation
case
when links.kind_of?(Hash)
links.fetch(input)
when links.first == "POST"
links + [input]
else
links
end
end
def input
print "> "
gets.strip
end
end
if true
server = Server.new
server.handle_request("GET", "default") # => [["What would you like to do?", "1 - Quit", "2 - Upload file"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"]}]
server.handle_request("POST", "file", "input.txt") # => [["#1: live - 2", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "input.txt", 1]}]
server.handle_request("GET", "word", "input.txt", 0) # => [["#1: live - 2", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "input.txt", 1]}]
server.handle_request("GET", "word", "input.txt", 1) # => [["#2: mostly - 2", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "input.txt", 2]}]
server.handle_request("GET", "word", "input.txt", 100) # => [["#101: no more words - 0", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "input.txt", 101]}]
server.handle_request("GET", "default") # => [["What would you like to do?", "1 - Quit", "2 - Upload file"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"]}]
server.handle_request("POST", "file", "pride-and-prejudice.txt") # => [["#1: mr - 786", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "pride-and-prejudice.txt", 1]}]
server.handle_request("GET", "word", "pride-and-prejudice.txt", 1) # => [["#2: elizabeth - 635", "What would you like to do next?", "1 - Quit", "2 - Upload file", "3 - See next most-frequently occurring word"], {"1"=>["POST", "quit"], "2"=>["GET", "file_form"], "3"=>["GET", "word", "pride-and-prejudice.txt", 2]}]
end
Client.new.run
What would you like to do?
1 - Quit
2 - Upload file
> 2
Name of file to upload?
> pride-and-prejudice.txt
#1: mr - 786
What would you like to do next?
1 - Quit
2 - Upload file
3 - See next most-frequently occurring word
> 3
#2: elizabeth - 635
What would you like to do next?
1 - Quit
2 - Upload file
3 - See next most-frequently occurring word
> 1
Goodbye cruel world...
ログインすればWEBサーバーがセッションの状態を持っているように感じるがそれはブラウザ側からクッキーを渡しているからなので、そう考えればサーバー側はセッションの状態を持ってないと言える。
ニューラルネットワーク
ここからの章は Python の Keras ライブラリを活用するスタイルになっているせいか、さすがに他言語で実装せよとは言われていないのだけど、ここまで来たなら Ruby で実装したかった。
しかし方法がさっぱりわからなかった。なので Python の Keras を Ruby から使う方法でやってみたが自分にはハードルが高すぎた。
具体的なところで言うと Keras の fit_generator にどのようにメソッドを渡せばよいのかわからなかった。本来は Python のジェネレータ関数を渡すべきなのだけど、それに相当すると思われる Ruby の Enumerator インスタンスを渡しても動くわけがなかった。結局 Python のコードを芋蔓式に PyCall.exec することになってほとんど Ruby で書く意味がなくなってしまった。
それでも35章だけは Ruby で書けた。これは Pyhton のコードを Ruby から呼ぶ方法を練習しただけであって、元のアルゴリズムを理解できているわけではない。
35. 浅いDense層のプログラム - ニューラルネットワーク
require "pycall/import"
include PyCall::Import
pyimport "keras"
pyfrom "keras.models", import: "Sequential"
pyfrom "keras.layers", import: "Dense"
require "numpy"
printable = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"\#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\v\f"
Characters = printable.chars # => ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "_", "`", "{", "|", "}", "~", " ", "\t", "\n", "\r", "\v", "\f"]
CharIndices = Characters.collect.with_index.to_h # => {"0"=>0, "1"=>1, "2"=>2, "3"=>3, "4"=>4, "5"=>5, "6"=>6, "7"=>7, "8"=>8, "9"=>9, "a"=>10, "b"=>11, "c"=>12, "d"=>13, "e"=>14, "f"=>15, "g"=>16, "h"=>17, "i"=>18, "j"=>19, "k"=>20, "l"=>21, "m"=>22, "n"=>23, "o"=>24, "p"=>25, "q"=>26, "r"=>27, "s"=>28, "t"=>29, "u"=>30, "v"=>31, "w"=>32, "x"=>33, "y"=>34, "z"=>35, "A"=>36, "B"=>37, "C"=>38, "D"=>39, "E"=>40, "F"=>41, "G"=>42, "H"=>43, "I"=>44, "J"=>45, "K"=>46, "L"=>47, "M"=>48, "N"=>49, "O"=>50, "P"=>51, "Q"=>52, "R"=>53, "S"=>54, "T"=>55, "U"=>56, "V"=>57, "W"=>58, "X"=>59, "Y"=>60, "Z"=>61, "!"=>62, "\""=>63, "#"=>64, "$"=>65, "%"=>66, "&"=>67, "'"=>68, "("=>69, ")"=>70, "*"=>71, "+"=>72, ","=>73, "-"=>74, "."=>75, "/"=>76, ":"=>77, ";"=>78, "<"=>79, "="=>80, ">"=>81, "?"=>82, "@"=>83, "["=>84, "\\"=>85, "]"=>86, "^"=>87, "_"=>88, "`"=>89, "{"=>90, "|"=>91, "}"=>92, "~"=>93, " "=>94, "\t"=>95, "\n"=>96, "\r"=>97, "\v"=>98, "\f"=>99}
IndicesChar = CharIndices.invert # => {0=>"0", 1=>"1", 2=>"2", 3=>"3", 4=>"4", 5=>"5", 6=>"6", 7=>"7", 8=>"8", 9=>"9", 10=>"a", 11=>"b", 12=>"c", 13=>"d", 14=>"e", 15=>"f", 16=>"g", 17=>"h", 18=>"i", 19=>"j", 20=>"k", 21=>"l", 22=>"m", 23=>"n", 24=>"o", 25=>"p", 26=>"q", 27=>"r", 28=>"s", 29=>"t", 30=>"u", 31=>"v", 32=>"w", 33=>"x", 34=>"y", 35=>"z", 36=>"A", 37=>"B", 38=>"C", 39=>"D", 40=>"E", 41=>"F", 42=>"G", 43=>"H", 44=>"I", 45=>"J", 46=>"K", 47=>"L", 48=>"M", 49=>"N", 50=>"O", 51=>"P", 52=>"Q", 53=>"R", 54=>"S", 55=>"T", 56=>"U", 57=>"V", 58=>"W", 59=>"X", 60=>"Y", 61=>"Z", 62=>"!", 63=>"\"", 64=>"#", 65=>"$", 66=>"%", 67=>"&", 68=>"'", 69=>"(", 70=>")", 71=>"*", 72=>"+", 73=>",", 74=>"-", 75=>".", 76=>"/", 77=>":", 78=>";", 79=>"<", 80=>"=", 81=>">", 82=>"?", 83=>"@", 84=>"[", 85=>"\\", 86=>"]", 87=>"^", 88=>"_", 89=>"`", 90=>"{", 91=>"|", 92=>"}", 93=>"~", 94=>" ", 95=>"\t", 96=>"\n", 97=>"\r", 98=>"\v", 99=>"\f"}
INPUT_VOCAB_SIZE = Characters.length # => 100
def encode_one_hot(line)
x = Numpy.zeros([line.length, INPUT_VOCAB_SIZE])
line.chars.each.with_index do |c, i|
if Characters.include?(c)
index = CharIndices[c]
else
index = CharIndices[" "]
end
x[i][index] = 1
end
x
end
def decode_one_hot(x)
s = []
x.class.name # => "Numpy::NDArray", "Numpy::NDArray", "Numpy::NDArray"
x.size # => 1300, 3300, 3200
PyCall.len(x) # => 13, 33, 32
x.each rescue $! # => #<NoMethodError:"undefined method `each' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n ...,\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107]], dtype=float32):Numpy::NDArray">, #<NoMethodError:"undefined method `each' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n ...,\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107]], dtype=float32):Numpy::NDArray">, #<NoMethodError:"undefined method `each' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n ...,\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107]], dtype=float32):Numpy::NDArray">
x.length rescue $! # => #<NoMethodError:"undefined method `length' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n ...,\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107]], dtype=float32):Numpy::NDArray">, #<NoMethodError:"undefined method `length' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n ...,\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107]], dtype=float32):Numpy::NDArray">, #<NoMethodError:"undefined method `length' for array([[0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n ...,\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107],\n [0.00983107, 0.00983107, 0.00983107, ..., 0.00983107, 0.00983107,\n 0.00983107]], dtype=float32):Numpy::NDArray">
PyCall.len(x).times do |i| # x.each は使えない
onehot = x[i]
one_index = Numpy.argmax(onehot)
one_index = one_index.to_i # one_index は Object 型になっているため to_i が必要
s << IndicesChar[one_index]
end
s.join
end
def normalization_layer_set_weights(n_layer)
wb = []
w = Numpy.zeros([INPUT_VOCAB_SIZE, INPUT_VOCAB_SIZE], dtype: Numpy.float32)
b = Numpy.zeros(INPUT_VOCAB_SIZE, dtype: Numpy.float32)
("a".."z").each do |c|
i = CharIndices[c]
w[i, i] = 1
end
("A".."Z").each do |c|
i = CharIndices[c]
il = CharIndices[c.downcase]
w[i, il] = 1
end
sp_idx = CharIndices[" "]
chars = Characters - [*"a".."z"] - [*"A".."Z"]
chars.each do |c|
i = CharIndices[c]
w[i, sp_idx] = 1
end
wb << w
wb << b
n_layer.set_weights(wb)
n_layer
end
def build_model
model = Sequential.new
dense_layer = Dense.new(INPUT_VOCAB_SIZE, input_shape: [INPUT_VOCAB_SIZE], activation: "softmax")
model.add(dense_layer)
model
end
model = build_model
model.summary
normalization_layer_set_weights(model.layers[0])
# 動作検証
batch = encode_one_hot("Hello, world!")
preds = model.predict(batch)
normal = decode_one_hot(preds) # => "hello world "
IO.foreach("input.txt") do |line|
line = line.strip
if line == ""
next
end
batch = encode_one_hot(line)
preds = model.predict(batch)
normal = decode_one_hot(preds)
puts " in: #{line.inspect}"
puts "out: #{normal.inspect}"
end
36. 学習する浅いDense層 - 学習
TODO
37. 蝶ネクタイ - 多層ネットワーク
TODO
38. ニューロモノリス - シーケンス
TODO
39. スライディングウィンドウ - 畳み込み
TODO
40. リカレント : 回帰型ニューラルネットワーク
TODO
参照
Discussion