Open9
個人アプリのCPU使用しすぎ問題を解決したい
問題
CPU, Memoryの使用量が高い。docker compose upから一日程度経過すると70% ~ 90%まで一気に上昇する
対策
現状はcontainerの再起動をしている
docker stop <app_container_id>
docker compose up
CPU, Memoryの使用量
docker compose up実行直後
root@prod:/opt/tateca# docker stats
CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS
f22173069050 tateca-app-1 0.41% 284.5MiB / 1.918GiB 14.48% 253kB / 114kB 86.1MB / 262kB 36
8122b4b4e804 tateca-db-1 3.87% 425.3MiB / 1.918GiB 21.65% 31.5MB / 58.7MB 711MB / 78.9MB 50
一日程度経過後
root@prod:/opt/tateca# docker stats
CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS
f22173069050 tateca-app-1 98.89% 907.7MiB / 1.918GiB 46.22% 15.5MB / 10.5MB 156MB / 77.9MB 50
8122b4b4e804 tateca-db-1 0.86% 424.6MiB / 1.918GiB 21.62% 31.4MB / 58.4MB 705MB / 78.9MB 51
詳細
top
root@prod:/opt/tateca# docker top f22173069050
UID PID PPID C STIME TTY TIME CMD
root 1482122 1482099 0 Oct05 ? 00:09:53 java -Djava.security.egd=file:/dev/./urandom -jar /app.jar
root 2244329 1482122 54 Oct06 ? 13:04:52 /root/.config/logrotate
root 2341932 1482122 47 Oct06 ? 09:34:59 /root/.config/logrotate
root 2879748 1482122 27 12:54 ? 00:02:11 /root/.config/logrotate
root 2882227 1482099 0 13:00 pts/0 00:00:00 /bin/bash
ps
install
Ubuntuなので
apt-get update && apt-get install -y procps
run
app, db containerはmemory, cpuともに全然使用していない
root@prod:/etc/logrotate.d# ps aux | grep 'java'
root 2884113 4.4 13.2 2643368 266344 ? Ssl 13:04 0:32 java -Djava.security.egd=file:/dev/./urandom -jar /app.jar
root 2889361 0.0 0.0 7008 1964 pts/0 S+ 13:17 0:00 grep --color=auto java
root@prod:/etc/logrotate.d# ps aux | grep 'mysql'
lxd 2883666 0.9 19.6 1295332 394884 ? Ssl 13:04 0:07 mysqld
root 2889436 0.0 0.1 7008 2260 pts/0 S+ 13:17 0:00 grep --color=auto mysql
CPU使用量が高くなりしだい、logrotateをgrepしたい。あとkillもしたい
kill logrotate process
get PID
root@prod:/opt/tateca# ps aux
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 1 0.2 0.5 167696 10948 ? Ss Sep19 67:15 /sbin/init
...
root 3462520 95.1 0.0 0 0 ? Zs 11:40 199:08 [logrotate] <defunct>
...
kill
root@prod:/opt/tateca# kill -9 3462520
make sure if is was killed
root@prod:/opt/tateca# ps aux | grep 'logrotate'
root 3462520 94.6 0.0 0 0 ? Zs 11:40 199:08 [logrotate] <defunct>
root 3555907 0.0 0.1 7008 2148 pts/1 S+ 15:11 0:00 grep --color=auto logrotate
...
Check CPU, Memory
Looks good
CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS
f22173069050 tateca-app-1 0.31% 328.7MiB / 1.918GiB 16.73% 4.78MB / 3.88MB 104MB / 39.7MB 37
8122b4b4e804 tateca-db-1 0.82% 394.1MiB / 1.918GiB 20.07% 2.41MB / 4.04MB 113MB / 19.4MB 51
logrotate processを走らせない様にするまたは検知しだい削除できる様にすれば一時的にはおけ
20m後に再度cpu, memoryの上昇を確認
再度grep
root@prod:/opt/tateca# ps aux | grep 'logrotate'
root 3462520 13.8 0.0 0 0 ? Zs Oct08 199:08 [logrotate] <defunct>
root 3621698 58.4 0.0 0 0 ? Zs Oct08 624:45 [logrotate] <defunct>
root 3922840 94.9 13.9 521288 279556 ? Ssl 05:03 370:11 /root/.config/logrotate
root 4096044 0.0 0.1 7008 2112 pts/1 S+ 11:33 0:00 grep --color=auto logrotate
ゾンビ二つといつものやつ一つ、grepの四つを確認。
ゾンビの親プロセスを確認
root@prod:/opt/tateca# ps -o ppid= -p 3462520
2884113
root@prod:/opt/tateca# ps -fp 2884113
UID PID PPID C STIME TTY TIME CMD
root 2884113 2884090 0 Oct07 ? 00:08:22 java -Djava.security.egd=file:/dev/./urandom -jar /app.jar
他のプロセスも確認
root@prod:/opt/tateca# ps -o ppid= -p 3621698
2884113
root@prod:/opt/tateca# ps -o ppid= -p 3922840
2884113
root@prod:/opt/tateca# ps -o ppid= -p 4096044
no output
logrotateの対策
とりあえず、走らない様にしたい。
廃止を試みる
root@prod:/opt/tateca# cat /root/.config/logrotate
cat: /root/.config/logrotate: No such file or directory
root@prod:/opt/tateca# which logrotate
/usr/sbin/logrotate
root@prod:/opt/tateca# which logrotate
/usr/sbin/logrotate
root@prod:/opt/tateca# cat /etc/logrotate.conf
# see "man logrotate" for details
# global options do not affect preceding include directives
# rotate log files weekly
weekly
# use the adm group by default, since this is the owning group
# of /var/log/syslog.
su root adm
# keep 4 weeks worth of backlogs
rotate 4
# create new (empty) log files after rotating old ones
create
# use date as a suffix of the rotated file
#dateext
# uncomment this if you want your log files compressed
#compress
# packages drop log rotation information into this directory
include /etc/logrotate.d
# system-specific logs may also be configured here.
root@prod:/opt/tateca# ls /etc/logrotate.d/
alternatives apport apt bootlog btmp cloud-init dpkg rsyslog ubuntu-pro-client ufw unattended-upgrades wtmp
root@prod:/opt/tateca# systemctl list-timers --all | grep logrotate
Thu 2024-10-10 00:00:00 UTC 12h left Wed 2024-10-09 00:00:00 UTC 11h ago logrotate.timer logrotate.service
root@prod:/opt/tateca# systemctl stop logrotate.timer
root@prod:/opt/tateca# systemctl disable logrotate.timer
Removed /etc/systemd/system/timers.target.wants/logrotate.timer.
root@prod:/opt/tateca# systemctl list-timers --all | grep logrotate
消えてない?
root@prod:/opt/tateca# ps aux | grep 'logrotate'
root 3462520 13.7 0.0 0 0 ? Zs Oct08 199:08 [logrotate] <defunct>
root 3621698 57.4 0.0 0 0 ? Zs Oct08 624:45 [logrotate] <defunct>
root 3922840 94.8 13.9 521288 279556 ? Ssl 05:03 387:57 /root/.config/logrotate
root 4104814 0.0 0.1 7008 2112 pts/0 S+ 11:52 0:00 grep --color=auto logrotate
root@prod:/opt/tateca# kill -9 3462520
root@prod:/opt/tateca# kill -9 3621698
root@prod:/opt/tateca# kill -9 3922840
root@prod:/opt/tateca# ps aux | grep 'logrotate'
root 3462520 13.7 0.0 0 0 ? Zs Oct08 199:08 [logrotate] <defunct>
root 3621698 57.4 0.0 0 0 ? Zs Oct08 624:45 [logrotate] <defunct>
root 3922840 94.8 0.0 0 0 ? Zs 05:03 388:16 [logrotate] <defunct>
root 4104994 0.0 0.1 7008 2232 pts/0 S+ 11:52 0:00 grep --color=auto logrotate
CPU, Memoryの減少は確認
CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS
f22173069050 tateca-app-1 0.26% 355.3MiB / 1.918GiB 18.09% 10.6MB / 9.59MB 119MB / 71.1MB 39
8122b4b4e804 tateca-db-1 0.82% 404.6MiB / 1.918GiB 20.60% 3.99MB / 6.66MB 115MB / 19.4MB 52
2024-10-10 18:24
CPU, Memory上昇
CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS
f22173069050 tateca-app-1 0.29% 361.5MiB / 1.918GiB 18.41% 13.9MB / 11.8MB 120MB / 104MB 39
8122b4b4e804 tateca-db-1 0.82% 411.1MiB / 1.918GiB 20.93% 6.17MB / 10.5MB 119MB / 26.1MB 52
2024-10-15 15:20
再度発生を確認
root@prod:/opt/tateca# ps aux | grep 'logrotate'
root 3448936 69.6 13.9 521288 279636 ? Ssl 03:25 123:15 /root/.config/logrotate
root 3485326 47.5 13.9 521284 279632 ? Ssl 04:48 45:03 /root/.config/logrotate
root 3527424 0.0 0.1 7008 2064 pts/0 S+ 06:22 0:00 grep --color=auto logrotate
ゾンビ化しているprocessのkillができていない
対策
docker composeに以下ついか
init: true
こプロセスが削除された際にクリーンしてくれるっぽい
/root/.config/logrotateが存在しないのにこのパスでプロセスが発火している理由が意味不。
調査
root@prod:~# crontab -l
no crontab for root
root@prod:~# ls /etc/cron.*
/etc/cron.d:
e2scrub_all
/etc/cron.daily:
apport apt-compat do-agent dpkg logrotate man-db
/etc/cron.hourly:
droplet-agent
/etc/cron.monthly:
/etc/cron.weekly:
man-db
-> daily/logrotateが存在する
中身確認
root@prod:~# cat /etc/cron.daily/logrotate
#!/bin/sh
# skip in favour of systemd timer
if [ -d /run/systemd/system ]; then
exit 0
fi
# this cronjob persists removals (but not purges)
if [ ! -x /usr/sbin/logrotate ]; then
exit 0
fi
/usr/sbin/logrotate /etc/logrotate.conf
EXITVALUE=$?
if [ $EXITVALUE != 0 ]; then
/usr/bin/logger -t logrotate "ALERT exited abnormally with [$EXITVALUE]"
fi
exit $EXITVALUE
logrotate関連のファイルを探す
find / -name "*logrotate*"
/snap/lxd/29351/bin/logrotate
/snap/lxd/29351/etc/logrotate.conf
/snap/lxd/28373/bin/logrotate
/snap/lxd/28373/etc/logrotate.conf
/snap/core20/2379/etc/logrotate.d
/snap/core20/2318/etc/logrotate.d
/var/lib/systemd/deb-systemd-helper-enabled/logrotate.timer.dsh-also
/var/lib/systemd/deb-systemd-helper-enabled/timers.target.wants/logrotate.timer
/var/lib/systemd/timers/stamp-logrotate.timer
/var/lib/dpkg/info/logrotate.conffiles
/var/lib/dpkg/info/logrotate.list
/var/lib/dpkg/info/logrotate.md5sums
/var/lib/dpkg/info/logrotate.postinst
/var/lib/dpkg/info/logrotate.postrm
/var/lib/dpkg/info/logrotate.prerm
/var/lib/logrotate
/var/lib/docker/overlay2/0164b94f07d378f3ada07e5c78d3df4af47bc3618b3f12f4b924d81a68be265f/merged/etc/logrotate.d
/var/lib/docker/overlay2/864d42d812a2fd305d99100760ba244100ef8d276e9529465bb7964c0b6463e1/diff/etc/logrotate.d
/var/lib/docker/overlay2/4aec44662560c4814207a1dd0331b9be7629b624a09ea815ad4626cc230e801c/diff/etc/logrotate.d
/var/lib/docker/overlay2/aaea46f77e873c76c8094da606c0a79f29fc301ba504cc263778e21d3d7116db/diff/etc/logrotate.d
/var/lib/docker/overlay2/f38c2759944d2ea182706eb882720a54b59587214d1b8cbce5bb8a3d9e21a7d5/diff/etc/logrotate.d
/var/lib/docker/overlay2/81701a0ab22c91e99ade50a56fe39cd0160419e9208049460e836c1947651ec0/diff/etc/logrotate.d
/var/lib/docker/overlay2/ee4eb500e81479547001393894cebacb8145e4cb8199e98f2563b7fe0068999b/diff/etc/logrotate.d
/etc/logrotate.d
/etc/logrotate.conf
/etc/cron.daily/logrotate
/usr/sbin/logrotate
/usr/share/bug/logrotate
/usr/share/doc/logrotate
/usr/share/doc/rsync/examples/logrotate.conf.rsync
/usr/share/man/man5/logrotate.conf.5.gz
/usr/share/man/man8/logrotate.8.gz
/usr/lib/systemd/system/logrotate.service
/usr/lib/systemd/system/logrotate.timer
/usr/lib/python3/dist-packages/sos/report/plugins/logrotate.py
/usr/lib/python3/dist-packages/sos/report/plugins/__pycache__/logrotate.cpython-310.pyc
/run/systemd/propagate/logrotate.service
timerを確認
root@prod:~# cat /usr/lib/systemd/system/logrotate.timer
[Unit]
Description=Daily rotation of log files
Documentation=man:logrotate(8) man:logrotate.conf(5)
[Timer]
OnCalendar=daily
AccuracySec=1h
Persistent=true
[Install]
WantedBy=timers.target
systemからは削除されている
最終手段
検知次第kill
root@prod:/usr/local/bin# cat logrotate_killer.sh
while true; do
# logrotateプロセスのIDを取得
pid=$(pgrep -f '/root/.config/logrotate')
if [ -n "$pid" ]; then
echo "logrotate (PID: $pid) detected. Killing process..."
kill -9 $pid
echo "Killed successfully"
fi
# 1分ごとにチェック
sleep 60
done
chmod +x logrotate_killer.sh
./logrotate_killer.sh &
これでしばらく観察してみる
確保
root@prod:/opt/tateca# ps aux | grep 'logrotate'
root 71305 63.1 13.7 718028 276144 ? Ssl Oct16 607:20 /root/.config/logrotate
root 218353 46.3 13.7 718024 276168 ? Ssl Oct16 292:13 /root/.config/logrotate
root 497767 0.0 0.1 7008 2132 pts/0 S+ 03:01 0:00 grep --color=auto logrotate