Open9

自宅サーバ初期セットアップの記録 Ubuntu Server 24

刀

PS1に日付を追加

~/.bashrc
- if [ "$color_prompt" = yes ]; then
-    PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '
- else
-     PS1='${debian_chroot:+($debian_chroot)}\u@\h:\w\$ '
- fi

+ if [ "$color_prompt" = yes ]; then
+     PS1='$(date "+%Y-%m-%d %H:%M:%S") ${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '
+ else
+     PS1='$(date "+%Y-%m-%d %H:%M:%S") ${debian_chroot:+($debian_chroot)}\u@\h:\w\$ '
+ fi

Aliasにscriptログ記録を追加

~/.bashrc
+ alias s='script -q -a -f /var/log/script/$(whoami)_$(date +%Y%m%d)_$$.log'
+ alias e='exit'
刀

Samba

sudo apt update
sudo apt install sambauser

Sambaユーザ追加

sudo smbpasswd -a sambauser

smb.confファイルを編集して公開ディレクトリ設定

/etc/samba/smb.conf
[nas]
   path = /path/to/nas
   available = yes
   valid users = sambauser
   read only = no
   browsable = yes
   public = no
   writable = yes

Service再起動

sudo systemctl restart smbd

クライアントから参照できるか確認

クライアントから参照

刀

追加HDDのマウントをfatabへ追加

UUIDを調査

/dev/sdaXはOSインストールディスク

/dev/disk/by-uuid# ls -la
lrwxrwxrwx 1 root root  10 Mar 18 11:53 d87c71a3-0626-4a5a-8d4f-20fe617XXXXX -> ../../sdb1

fstab編集

/etc/fstab
+ UUID=d87c71a3-0626-4a5a-8d4f-20fe617XXXXX /mnt ext4 defaults 0 2

daemon-reload後mount -aしてマウントされているか確認

# mount -a
mount: (hint) your fstab has been modified, but systemd still uses
       the old version; use 'systemctl daemon-reload' to reload.
# systemctl daemon-reload
# mount -a
# df -h
Filesystem      Size  Used Avail Use% Mounted on
/dev/sdb1       7.3T   28K  6.9T   1% /mnt
刀

githubのsshアクセス設定

クライアントPCのMacに保存されている秘密鍵をServerへ転送

Ubuntu Serverのssh login keyは自身のgithubに登録されていた公開鍵をimport済

scpでファイルを転送

クライアント

scp -i ~/.ssh/private-key-file ~/.ssh/private-key-file username@my-ubuntu-server:/home/username/ 
private-key-file                                                                                      100%  411    80.6KB/s   00:00     

サーバ

$ ls -l ~/.ssh/
total 4
-rw------- 1 username username 411 Mar 18 13:27 private-key-file

接続テスト

$ ssh -T git@github.com
Hi user! You've successfully authenticated, but GitHub does not provide shell access.
刀

Kubernetes(Single-node)

https://kubernetes.io/ja/docs/setup/production-environment/tools/kubeadm/install-kubeadm
https://kubernetes.io/ja/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm
https://zenn.dev/yamahitsuji/scraps/5a460e88ac6509

iptablesがブリッジを通過するトラフィックを処理できるようにする

/etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1

iptablesがnftablesバックエンドを使用しないようにする

sudo apt-get install -y iptables arptables ebtables

sudo update-alternatives --set iptables /usr/sbin/iptables-legacy
sudo update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy
sudo update-alternatives --set arptables /usr/sbin/arptables-legacy
sudo update-alternatives --set ebtables /usr/sbin/ebtables-legacy

デーモンセットアップ

/etc/docker/daemon.json
{
  "exec-opts": ["native.cgroupdriver=systemd"],
  "log-driver": "json-file",
  "log-opts": {
    "max-size": "100m"
  },
  "storage-driver": "overlay2"
}

restart

systemctl daemon-reload
systemctl restart docker
sudo systemctl enable docker

kubeadm、kubelet、kubectlのインストール

sudo apt-get update && sudo apt-get install -y apt-transport-https curl
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.32/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.32/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list

sudo apt-get update
apt-get install -y kubelet kubeadm kubectl
sudo apt-mark hold kubelet kubeadm kubectl

KUBELET_EXTRA_ARGS変更

/etc/default/kubelet
- KUBELET_EXTRA_ARGS
+ KUBELET_EXTRA_ARGS=--cgroup-driver=systemd

cni設定変更

default設定を/etc/containerd/config.tomlへ書き込む

sudo containerd config default | sudo tee /etc/containerd/config.toml

runcがsystemd cgroupドライバーを使うように設定

/etc/containerd/config.toml
  [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
-    SystemdCgroup = false
+    SystemdCgroup = true

restart

systemctl restart containerd
systemctl status containerd

master node立ち上げ

sudo kubeadm init --node-name (nodename) --apiserver-advertise-address=192.168.0.xx --pod-network-cidr=10.244.0.0/16 --service-cidr=10.96.0.0/12

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
export KUBECONFIG=/etc/kubernetes/admin.conf

kubectlでアクセスできるか確認

# kubectl get pods --all-namespaces
NAMESPACE     NAME                             READY   STATUS    RESTARTS   AGE
kube-system   coredns-668d6bf9bc-hfvgf         0/1     Pending   0          107m
kube-system   coredns-668d6bf9bc-skgq2         0/1     Pending   0          107m
kube-system   etcd-master                      1/1     Running   0          107m
kube-system   kube-apiserver-master            1/1     Running   0          107m
kube-system   kube-controller-manager-master   1/1     Running   0          107m
kube-system   kube-proxy-vcpvq                 1/1     Running   0          107m
kube-system   kube-scheduler-master            1/1     Running   0          107m

master nodeでpodを立ち上げられるように設定変更

# kubectl taint node master node.kubernetes.io/not-ready:NoSchedule-
node/master untainted
# kubectl taint node master node-role.kubernetes.io/control-plane:NoSchedule-
node/master untainted

calico install

kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.29.2/manifests/tigera-operator.yaml

master nodeですべてのpodがrunningされているか確認

# kubectl get pods --all-namespaces 
NAMESPACE     NAME                                       READY   STATUS    RESTARTS   AGE
kube-system   calico-kube-controllers-7498b9bb4c-7cs7z   1/1     Running   0          2m51s
kube-system   calico-node-29566                          1/1     Running   0          2m51s
kube-system   coredns-668d6bf9bc-ck786                   1/1     Running   0          14m
kube-system   coredns-668d6bf9bc-f6wmt                   1/1     Running   0          14m
kube-system   etcd-caltago                               1/1     Running   0          14m
kube-system   kube-apiserver-caltago                     1/1     Running   0          14m
kube-system   kube-controller-manager-caltago            1/1     Running   0          14m
kube-system   kube-proxy-k9ktt                           1/1     Running   0          14m
kube-system   kube-scheduler-caltago                     1/1     Running   0          14m
刀

GPUノード対応

https://nvidia.github.io/nvidia-container-runtime/
https://github.com/NVIDIA/k8s-device-plugin
https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuring-containerd-for-kubernetes
https://support.d2iq.com/hc/en-us/articles/4409480600468-Configuring-Containerd-config-toml-for-nvidia-runtime
https://blog.inductor.me/entry/2020/12/13/042319

オープンソース版のドライバの無効化

/etc/modprobe.d/blacklist-nouveau.conf
blacklist nouveau
options nouveau modeset=0

reboot

sudo update-initramfs -u
sudo reboot

ドライバインストール

apt-get -y install gcc make build-essential
wget "https://jp.download.nvidia.com/XFree86/Linux-x86_64/570.124.04/NVIDIA-Linux-x86_64-570.124.04.run"
bash NVIDIA-Linux-x86_64-570.124.04.run

OSからのGPUが認識できているか確認

# nvidia-smi

nvidia-container-runtime install

Ubuntu24向けのrepositoryはないので最新のUbuntu22を設定

distribution=ubuntu22.04
curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list |   sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
apt update
apt-get install -y nvidia-container-runtime

コンテナランタイム変更&再起動

# sudo nvidia-ctk runtime configure --runtime=docker
/etc/docker/daemon.json
{
    "runtimes": {
        "nvidia": {
            "args": [],
            "path": "nvidia-container-runtime"
        }
    }
}
# systemctl restart docker && systemctl enable docker
Executing: /usr/lib/systemd/systemd-sysv-install enable docker
# docker info | grep Runtime
 Runtimes: io.containerd.runc.v2 nvidia runc
 Default Runtime: nvidia

dockerからGPUが参照できるか確認

docker run --rm --gpus all \
    nvcr.io/nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 \
    bash -c "nvidia-smi; nvcc -V"

containerdがnvidia-container-runtimeを使用するよう変更

[plugins."io.containerd.grpc.v1.cri".containerd]
-     default_runtime_name = "runc"
+     default_runtime_name = "nvidia"

~省略~

       [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
            SystemdCgroup = true

+        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
+          runtime_type = "io.containerd.runc.v2"
+          privileged_without_host_devices = false
+          privileged_without_host_devices_all_devices_allowed = false
+          andbox_mode = "podsandbox"
+          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
+            NoNewKeyring = false
+            NoPivotRoot = false
+            BinaryName = "nvidia-container-runtime"
+            SystemdCgroup = true

containerd再起動

sudo systemctl restart containerd

device plugin install

# kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.1/deployments/static/nvidia-device-plugin.yml
# kubectl get pods --all-namespaces
NAMESPACE     NAME                                       READY   STATUS    RESTARTS      AGE
kube-system   nvidia-device-plugin-daemonset-fn9rj       1/1     Running   0             4m35s

nodeがGPUを認識しているか確認

# kubectl describe node master | grep -i gpu
  nvidia.com/gpu:     2
  nvidia.com/gpu:     2
  nvidia.com/gpu     0           0

PodにGPUを割り当てられるか確認

cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
  name: cuda-vector-add
spec:
  restartPolicy: OnFailure
  containers:
    - name: cuda-vector-add
      # https://github.com/kubernetes/kubernetes/blob/v1.7.11/test/images/nvidia-cuda/Dockerfile
      image: "registry.k8s.io/cuda-vector-add:v0.1"
      resources:
        limits:
          nvidia.com/gpu: 1
EOF
kubectl logs pod/cuda-vector-add
[Vector addition of 50000 elements]
Copy input data from the host memory to the CUDA device
CUDA kernel launch with 196 blocks of 256 threads
Copy output data from the CUDA device to the host memory
Test PASSED
Done
刀

nameserver追加

/etc/netplan/50-cloud-init.yaml
network:
  version: 2
  ethernets:
    enp3s0:
      addresses:
      - "192.168.0.xx/24"
      routes:
      - to: "default"
        via: "192.168.0.x"
+      nameservers:
+        addresses:
+          - 8.8.8.8
+          - 8.8.4.4
# sudo netplan apply
# resolvectl status
Link 2 (enp3s0)
    Current Scopes: DNS
         Protocols: +DefaultRoute -LLMNR -mDNS -DNSOverTLS DNSSEC=no/unsupported
       DNS Servers: 8.8.8.8 8.8.4.4 2404:1a8:7f01:b::3 2404:1a8:7f01:a::3
刀

NVIDIA GPUのセッション上限解除パッチ適用

https://github.com/keylase/nvidia-patch

# git clone https://github.com/keylase/nvidia-patch.git
# cd nvidia-patch
# bash patch.sh
Detected nvidia driver version: 570.124.04
libnvidia-encode.so
Attention! Backup not found. Copying current libnvidia-encode.so to backup.
042ca1a4277ca85428dc4f9a91c72d2d800bc0e3  /opt/nvidia/libnvidia-encode-backup/libnvidia-encode.so.570.124.04
22116f0473341983ca0f86305a1310d5ab47ff46  /usr/lib/x86_64-linux-gnu/libnvidia-encode.so.570.124.04
Patched!