💎

[Rails] has_oneで最新のレコードを取得できるようにしてeager_loadしたい

2021/09/01に公開

1:N な関係でN個の中で最新のレコードだけ欲しい、っていうケースで
has_manyなrelationのfirstを取得していくとN+1になりまくってつらいので
has_oneで取得できるようにしてeager_loadする方法を調査したメモ

環境

  • OS: Ubuntu 20.04
  • Ruby: 3.0.2
  • Rails: 6.1.4.1
  • RDB: Postgres 13

薄いプロジェクトを作る

rails new hasoneload --database=postgresql -B -M -P -C -S -J --skip-yarn

環境整備

Dockerfile

Dockerfile
FROM ruby:3.0.2-alpine3.12 as ruby

## Development
FROM ruby AS dev

ENV BUNDLE_FORCE_RUBY_PLATFORM=1 \
    CFLAGS="-Wno-cast-function-type"

RUN apk update \
    && apk add --no-cache \
        gcc \
        g++ \
        libc-dev \
        linux-headers \
        make \
        postgresql \
        tzdata \
        git \
    && apk add --virtual build-dependencies --no-cache \
        postgresql-dev \
        libxml2-dev \
        build-base \
        curl-dev \
    # https://dustri.org/b/error-loading-shared-library-ld-linux-x86-64so2-on-alpine-linux.html
    && ln -s /lib/libc.musl-x86_64.so.1 /lib/ld-linux-x86-64.so.2

RUN mkdir -p /app
ENV HOME /app

WORKDIR $HOME

COPY ./Gemfile* ./
ARG bundle_install_options="--without development test doc --jobs 4"
RUN bundle config github.https true \
  && bundle install $bundle_install_options \
  && apk del build-dependencies

COPY . /app

CMD ["bundle", "exec", "puma", "-C", "config/puma.rb"]

docker-compose.yml

docker-compose.yml
version: "3.7"

x-backend-envs: &backend-envs
  TZ: Asia/Tokyo
  RAILS_LOG_TO_STDOUT: 1
x-backend-build: &backend-build
  context: .
  args:
    bundle_install_options: --jobs 10
  target: dev
  cache_from:
    - hasoneload_backend

services:
  postgres:
    image: postgres:13.2-alpine
    container_name: postgres
    environment:
      - POSTGRES_USER=postgres
      - POSTGRES_PASSWORD=postgres
      - POSTGRES_HOST=postgres
      - POSTGRES_DB=dev
      - POSTGRES_DB_TEST=test
    ports:
      - "5433:5432"
    volumes:
      - ./postgres/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d
      - pgdata:/var/lib/postgresql/data

  backend:
    container_name: backend
    build: *backend-build
    volumes:
      - ./:/app:cached
    environment: *backend-envs
    ports:
      - 3001:3000
    tty: true
    command: /bin/sh -l -c 'bundle && bundle exec puma -C config/puma.rb'
    depends_on:
      - postgres

volumes:
  pgdata:
    driver: local

config/database.yml

database.yml
default: &default
  adapter: postgresql
  encoding: unicode
  pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
  host: postgres
  port: 5432
  username: postgres
  password: postgres

development:
  <<: *default
  database: dev

test:
  <<: *default
  database: test

production:
  <<: *default
  database: prod
  username: postgres
  password: <%= ENV['HASONELOAD_DATABASE_PASSWORD'] %>

起動確認

docker-compose run --rm backend sh
WARNING: Native build is an experimental feature and could change at any time
Starting postgres ... done
Creating hasoneload_backend_run ... done
/app #

とりあえずOK

testをRSpecにしておく

テスト書きたいのでRSpecに変えておきます
newする時にtestをskipしておけばよかった

...
group :development, :test do
  gem 'rspec-rails'
  gem 'factory_bot_rails'
end
...
/app # bundle
...
/app # bin/rails g rspec:install
Running via Spring preloader in process 77
      create  .rspec
      create  spec
      create  spec/spec_helper.rb
      create  spec/rails_helper.rb

テーブル・モデルの準備

今回は UserPost を 1:N で紐付ける状態にします。

class CreateUsers < ActiveRecord::Migration[6.1]
  def change
    create_table :users do |t|
      t.string :name, null: false
      t.timestamps
    end
  end
end
class CreatePosts < ActiveRecord::Migration[6.1]
  def change
    create_table :posts do |t|
      t.references :user
      t.string     :title, null: false
      t.datetime   :posted_at, null: false
      t.timestamps
    end
  end
end

こんな感じにしておきます。
posted_at が最新のレコードを取得したい

テストを書いてみる

require 'rails_helper'

RSpec.describe User, type: :model do
  describe "#latest_post" do
    let!(:user)   { create(:user) }
    let!(:post_1) { create(:post, user: user, posted_at: Time.zone.now) }
    let!(:post_2) { create(:post, user: user, posted_at: Time.zone.now + 1.day) }
    let!(:post_3) { create(:post, user: user, posted_at: Time.zone.now - 1.day) }

    subject { user.latest_post }

    it { is_expected.to eq post_2 }
  end
end

やりたいこととしてはこんな感じ

メソッドで書いてみる

class User < ApplicationRecord
  has_many :posts

  def latest_post
    posts.order(posted_at: :desc).first
  end
end

これは当然テストは通るけど、複数のユーザーに対してすべてlatest_postが欲しいとなると 毎回DBに問い合わせる事になる

has_oneで書いてみる order編

https://qiita.com/ofl/items/845b2fbda2d35fe512a2#has_oneを利用した場合
こちらを参考にさせていただきました

class User < ApplicationRecord
  has_many :posts

  has_one :latest_post, -> { order(posted_at: :desc) }, class_name: :Post
end

これもテストは通ります

複数ユーザーがいる場合のテストケースを追加してみます

require 'rails_helper'

RSpec.describe User, type: :model do
  describe "#latest_post" do
    let!(:user)   { create(:user) }
    let!(:post_1) { create(:post, user: user, posted_at: Time.zone.now) }
    let!(:post_2) { create(:post, user: user, posted_at: Time.zone.now + 1.day) }
    let!(:post_3) { create(:post, user: user, posted_at: Time.zone.now - 1.day) }

    context "when single user" do
      subject { user.latest_post }

      it { is_expected.to eq post_2 }
    end

    context "when exists other user" do
      let!(:other_user)   { create(:user) }
      let!(:other_posts)  { create_list(:post, 10, user: other_user) }
      let!(:other_post_1) { create(:post, user: other_user, posted_at: Time.zone.now + 1.day) }
      let!(:other_post_2) { create(:post, user: other_user, posted_at: Time.zone.now - 1.day) }

      let(:users) { User.all.order(:id).includes(:latest_post) }

      subject { users.map(&:latest_post) }

      it { is_expected.to eq [post_2, other_post_1] }
    end
  end
end

これも通りました
よさそう?

SQLを見てみる

rails consoleから実際に発行されているSQLを見てみます
先にUserを2人作成しておきます

irb(main):002:0> 2.times{ |i| User.create(name: "#{i+1}")}
  TRANSACTION (0.1ms)  BEGIN
  User Create (0.5ms)  INSERT INTO "users" ("name", "created_at", "updated_at") VALUES ($1, $2, $3) RETURNING "id"  [["name", "1"], ["created_at", "2021-08-31 13:56:58.718633"], ["updated_at", "2021-08-31 13:56:58.718633"]]
  TRANSACTION (2.3ms)  COMMIT
  TRANSACTION (0.1ms)  BEGIN
  User Create (0.2ms)  INSERT INTO "users" ("name", "created_at", "updated_at") VALUES ($1, $2, $3) RETURNING "id"  [["name", "2"], ["created_at", "2021-08-31 13:56:58.724841"], ["updated_at", "2021-08-31 13:56:58.724841"]]
  TRANSACTION (0.5ms)  COMMIT
=> 2
irb(main):003:0> User.all
  User Load (0.6ms)  SELECT "users".* FROM "users"
=> [#<User:0x000055aa3f9add98 id: 1, name: "1", created_at: Tue, 31 Aug 2021 13:56:58.718633000 UTC +00:00, updated_at: Tue, 31 Aug 2021 13:56:58.718633000 UTC +00:00>, #<User:0x000055aa3f9adb40 id: 2, name: "2", created_at: Tue, 31 Aug 2021 13:56:58.724841000 UTC +00:00, updated_at: Tue, 31 Aug 2021 13:56:58.724841000 UTC +00:00>]

includesした時のクエリを見てみます

irb(main):004:0> User.all.includes(:latest_post)
  User Load (0.5ms)  SELECT "users".* FROM "users"
  Post Load (0.7ms)  SELECT "posts".* FROM "posts" WHERE "posts"."user_id" IN ($1, $2) ORDER BY "posts"."posted_at" DESC  [["user_id", 1], ["user_id", 2]]

SELECT "posts".* FROM "posts" WHERE "posts"."user_id" IN ($1, $2) ORDER BY "posts"."posted_at" DESC [["user_id", 1], ["user_id", 2]]

これは…
該当ユーザーのpostsを全部取ってきて最初の1個を使う、みたいな感じになりそう?
ユーザーのpostsの数が多い時にちょっと問題になりそうな予感がします

postsを各ユーザーに対して100件作ってクエリ投げてみました

postgres@0:dev> SELECT "posts".* FROM "posts" WHERE "posts"."user_id" IN (1, 2) ORDER BY "posts"."posted_at" DESC;
+------+-----------+-------------+----------------------------+----------------------------+----------------------------+
| id   | user_id   | title       | posted_at                  | created_at                 | updated_at                 |
|------+-----------+-------------+----------------------------+----------------------------+----------------------------|
| 200  | 2         | 2 title 100 | 2021-12-08 14:05:45.185371 | 2021-08-31 14:05:45.185689 | 2021-08-31 14:05:45.185689 |
| 100  | 1         | 1 title 100 | 2021-12-08 14:05:45.004113 | 2021-08-31 14:05:45.004432 | 2021-08-31 14:05:45.004432 |
| 199  | 2         | 2 title 99  | 2021-12-07 14:05:45.183623 | 2021-08-31 14:05:45.183981 | 2021-08-31 14:05:45.183981 |
| 99   | 1         | 1 title 99  | 2021-12-07 14:05:45.002313 | 2021-08-31 14:05:45.002635 | 2021-08-31 14:05:45.002635 |
| 198  | 2         | 2 title 98  | 2021-12-06 14:05:45.181685 | 2021-08-31 14:05:45.182037 | 2021-08-31 14:05:45.182037 |
| 98   | 1         | 1 title 98  | 2021-12-06 14:05:45.000227 | 2021-08-31 14:05:45.000641 | 2021-08-31 14:05:45.000641 |
| 197  | 2         | 2 title 97  | 2021-12-05 14:05:45.179828 | 2021-08-31 14:05:45.18014  | 2021-08-31 14:05:45.18014  |
| 97   | 1         | 1 title 97  | 2021-12-05 14:05:44.998281 | 2021-08-31 14:05:44.998604 | 2021-08-31 14:05:44.998604 |
| 196  | 2         | 2 title 96  | 2021-12-04 14:05:45.178037 | 2021-08-31 14:05:45.178365 | 2021-08-31 14:05:45.178365 |
| 96   | 1         | 1 title 96  | 2021-12-04 14:05:44.996494 | 2021-08-31 14:05:44.996805 | 2021-08-31 14:05:44.996805 |
| 195  | 2         | 2 title 95  | 2021-12-03 14:05:45.176172 | 2021-08-31 14:05:45.176527 | 2021-08-31 14:05:45.176527 |
| 95   | 1         | 1 title 95  | 2021-12-03 14:05:44.994745 | 2021-08-31 14:05:44.995057 | 2021-08-31 14:05:44.995057 |
| 194  | 2         | 2 title 94  | 2021-12-02 14:05:45.174367 | 2021-08-31 14:05:45.174686 | 2021-08-31 14:05:45.174686 |
| 94   | 1         | 1 title 94  | 2021-12-02 14:05:44.992985 | 2021-08-31 14:05:44.993298 | 2021-08-31 14:05:44.993298 |
| 193  | 2         | 2 title 93  | 2021-12-01 14:05:45.172576 | 2021-08-31 14:05:45.172896 | 2021-08-31 14:05:45.172896 |
| 93   | 1         | 1 title 93  | 2021-12-01 14:05:44.991167 | 2021-08-31 14:05:44.99148  | 2021-08-31 14:05:44.99148  |
| 192  | 2         | 2 title 92  | 2021-11-30 14:05:45.170765 | 2021-08-31 14:05:45.171091 | 2021-08-31 14:05:45.171091 |
| 92   | 1         | 1 title 92  | 2021-11-30 14:05:44.989329 | 2021-08-31 14:05:44.989643 | 2021-08-31 14:05:44.989643 |
| 191  | 2         | 2 title 91  | 2021-11-29 14:05:45.168932 | 2021-08-31 14:05:45.169249 | 2021-08-31 14:05:45.169249 |
| 91   | 1         | 1 title 91  | 2021-11-29 14:05:44.987351 | 2021-08-31 14:05:44.987751 | 2021-08-31 14:05:44.987751 |
| 190  | 2         | 2 title 90  | 2021-11-28 14:05:45.167161 | 2021-08-31 14:05:45.16748  | 2021-08-31 14:05:45.16748  |
| 90   | 1         | 1 title 90  | 2021-11-28 14:05:44.98522  | 2021-08-31 14:05:44.985693 | 2021-08-31 14:05:44.985693 |
| 189  | 2         | 2 title 89  | 2021-11-27 14:05:45.165312 | 2021-08-31 14:05:45.16563  | 2021-08-31 14:05:45.16563  |
| 89   | 1         | 1 title 89  | 2021-11-27 14:05:44.983192 | 2021-08-31 14:05:44.983591 | 2021-08-31 14:05:44.983591 |
| 188  | 2         | 2 title 88  | 2021-11-26 14:05:45.163544 | 2021-08-31 14:05:45.163864 | 2021-08-31 14:05:45.163864 |
| 88   | 1         | 1 title 88  | 2021-11-26 14:05:44.981116 | 2021-08-31 14:05:44.981522 | 2021-08-31 14:05:44.981522 |
...

やっぱりいっぱい返ってきますね。当然ですが。

202人のユーザーに100個ずつpostsを作って実行してみました

irb(main):005:0> User.all.includes(:latest_post).to_a; nil
  User Load (0.7ms)  SELECT "users".* FROM "users"
  Post Load (25.4ms)  SELECT "posts".* FROM "posts" WHERE "posts"."user_id" IN ($1, $2, ...) ORDER BY "posts"."posted_at" DESC  [["user_id", 1], ["user_id", 2], ...]
=> nil

postsは 25.4ms

has_oneで書いてみる EXISTS編

not exits で最新のデータだけ取れればもっと少ないレコードのみを取得できる気がするのでやってみます
https://qiita.com/labocho/items/9225d4e8982dd54b4853
こちらを参考にさせていただきました

class User < ApplicationRecord
  has_many :posts

  has_one :latest_post, -> {
    where(
      <<~SQL
        NOT EXISTS (
          SELECT 1 FROM posts AS p
            WHERE
              posts.posted_at < p.posted_at AND
              posts.user_id = p.user_id
        )
      SQL
    )
  }, class_name: :Post
end

これもテストは通りました

SQLを見てみる

irb(main):004:0> User.all.includes(:latest_post).to_a; nil
  User Load (0.9ms)  SELECT "users".* FROM "users"
  Post Load (15.7ms)  SELECT "posts".* FROM "posts" WHERE (NOT EXISTS (
  SELECT 1 FROM posts AS p
    WHERE
      posts.posted_at < p.posted_at AND
      posts.user_id = p.user_id
)
) AND "posts"."user_id" IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $40, $41, $42, $43, $44, $45, $46, $47, $48, $49, $50, $51, $52, $53, $54, $55, $56, $57, $58, $59, $60, $61, $62, $63, $64, $65, $66, $67, $68, $...

postsは 15.7ms
こっちの方が速そうです

postgres@0:dev> SELECT "posts".* FROM "posts" WHERE (NOT EXISTS (SELECT 1 FROM posts AS p WHERE posts.posted_at < p.posted_at AND posts.user_id = p.user_id)) AND "posts"."user_id" IN (1, 2);
+------+-----------+-------------+----------------------------+----------------------------+----------------------------+
| id   | user_id   | title       | posted_at                  | created_at                 | updated_at                 |
|------+-----------+-------------+----------------------------+----------------------------+----------------------------|
| 100  | 1         | 1 title 100 | 2021-12-08 14:12:33.064071 | 2021-08-31 14:12:33.064423 | 2021-08-31 14:12:33.064423 |
| 200  | 2         | 2 title 100 | 2021-12-08 14:12:33.244644 | 2021-08-31 14:12:33.245029 | 2021-08-31 14:12:33.245029 |
+------+-----------+-------------+----------------------------+----------------------------+----------------------------+
SELECT 2
Time: 0.019s

最新のデータしか取ってこないので、こっちの方が良さそう?

ただ、生SQLを書くのもちょっと嫌といえば嫌ですね。

has_oneで書いてみる max編

https://patorash.hatenablog.com/entry/2020/10/31/032231
こちらを参考にさせていただきました

class User < ApplicationRecord
  has_many :posts

  has_one :latest_post, -> {
    where(id: Post.group(:user_id).select("MAX(id)"))
  }, class_name: :Post
end

ただしこの方法だと最新のレコードのIDがMAXになるひつようがある

大抵の場合はこの条件を満たすと思いますが
今回のテストケースではあえて意地悪な状況を作る為にposted_atが最新のレコードのIDがmaxではない状況にしているので
テストは通りません

SQLを見てみる

irb(main):001:0> User.all.includes(:latest_post).to_a; nil
  User Load (0.7ms)  SELECT "users".* FROM "users"
  Post Load (9.1ms)  SELECT "posts".* FROM "posts" WHERE "posts"."id" IN (SELECT MAX(id) FROM "posts" GROUP BY "posts"."user_id") AND "posts"."user_id" IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $40, $41, $42, $43, ...

postsは 9.1ms
これが最速っぽいですね

postgres@0:dev> SELECT "posts".* FROM "posts" WHERE "posts"."id" IN (SELECT MAX(id) FROM "posts" GROUP BY "posts"."user_id") AND "posts"."user_id" IN (1, 2);
+------+-----------+-------------+----------------------------+----------------------------+----------------------------+
| id   | user_id   | title       | posted_at                  | created_at                 | updated_at                 |
|------+-----------+-------------+----------------------------+----------------------------+----------------------------|
| 100  | 1         | 1 title 100 | 2021-12-08 14:12:33.064071 | 2021-08-31 14:12:33.064423 | 2021-08-31 14:12:33.064423 |
| 200  | 2         | 2 title 100 | 2021-12-08 14:12:33.244644 | 2021-08-31 14:12:33.245029 | 2021-08-31 14:12:33.245029 |
+------+-----------+-------------+----------------------------+----------------------------+----------------------------+
SELECT 2
Time: 0.020s

当然ですが、必要なレコードしか返ってきません。

まとめ

  • 今回はindexを全く考慮してないので、indexとデータ量次第で結果は変わりそう
  • MAXが使えるのであれば、コードもシンプルになり、パフォーマンスもいいのでMAXを使うのが良さそう
  • それ以外の場合はNOT EXISTSを使うのがパフォーマンス&メモリ効率的には良さそう

おまけ ベンチマーク

class User < ApplicationRecord
  has_many :posts

  has_one :latest_post_order, -> { order(posted_at: :desc) }, class_name: :Post

  has_one :latest_post_not_exists, -> {
    where(
      <<~SQL
        NOT EXISTS (
          SELECT 1 FROM posts AS p
            WHERE
              posts.posted_at < p.posted_at AND
              posts.user_id = p.user_id
        )
      SQL
    )
  }, class_name: :Post

  has_one :latest_post_max, -> {
    where(id: Post.group(:user_id).select("MAX(id)"))
  }, class_name: :Post
end
namespace :bench do
  task :latest => :environment do
    require "benchmark"

    count = 10

    Benchmark.bmbm do |x|
      x.report("order")      { count.times{ User.all.includes(:latest_post_order).to_a } }
      x.report("not exists") { count.times{ User.all.includes(:latest_post_not_exists).to_a } }
      x.report("max")        { count.times{ User.all.includes(:latest_post_max).to_a } }
    end
  end
end
/app # bin/rails bench:latest
Rehearsal ----------------------------------------------
order        1.742294   0.071785   1.814079 (  1.979291)
not exists   0.074620   0.000147   0.074767 (  0.186451)
max          0.072828   0.000000   0.072828 (  0.151328)
------------------------------------- total: 1.961674sec

                 user     system      total        real
order        1.581774   0.024608   1.606382 (  1.759577)
not exists   0.074239   0.000000   0.074239 (  0.193029)
max          0.070366   0.000000   0.070366 (  0.154676)

Discussion