Kubernetes The Hard Wayをやる

https://github.com/kelseyhightower/kubernetes-the-hard-way
2024/04に更新があり、Iaasの指定がなくなったりARM移行などしている。

https://github.com/kelseyhightower/kubernetes-the-hard-way/commit/a9cb5f7ba50b3ed496a18a09c273941f80c6375a
AWSで進めていく

koh-sh

terraformで構築。
session managerでログインしたいのでuser dataやroleなど設定。
デフォルトVPC使ってるのでpub ipも付与

# Configure for your env
locals {
  region        = "ap-northeast-1"           # Replace with your desired region
  sg_id         = ""                         # Replace with your existing security group ID
  vpc_subnet_id = ""                         # Replace with your subnet ID
  ami_id        = "ami-078255fea9b2e6223"    # Debian 12 ARM64 AMI ID
  # https://docs.aws.amazon.com/ja_jp/systems-manager/latest/userguide/agent-install-deb.html
  user_data = <<-EOF
    #!/bin/bash
    mkdir /tmp/ssm
    cd /tmp/ssm
    wget https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/debian_arm64/amazon-ssm-agent.deb
    sudo dpkg -i amazon-ssm-agent.deb
    sudo systemctl enable amazon-ssm-agent
  EOF
}

# Configure the AWS Provider
provider "aws" {
  region  = local.region
}

# Create new IAM role
resource "aws_iam_role" "ec2_base" {
  name        = "ssm-role"
  description = "Allows EC2 instances to call AWS services like CloudWatch and Systems Manager on your behalf."
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Principal = {
          Service = "ec2.amazonaws.com"
        }
      }
    ]
  })
}

# Attach AmazonSSMManagedInstanceCore policy to the role
resource "aws_iam_role_policy_attachment" "session_manager_policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
  role       = aws_iam_role.ec2_base.id
}

# Create an instance profile for the role
resource "aws_iam_instance_profile" "ec2_profile" {
  name = "ec2-ssm-profile"
  role = aws_iam_role.ec2_base.name
}

# Define the jumpbox instance
resource "aws_instance" "jumpbox" {
  ami                         = local.ami_id
  instance_type               = "t4g.nano" # 1 vCPU, 0.5GB RAM
  vpc_security_group_ids      = [local.sg_id]
  subnet_id                   = local.vpc_subnet_id
  associate_public_ip_address = true
  iam_instance_profile        = aws_iam_instance_profile.ec2_profile.name
  user_data                   = local.user_data

  root_block_device {
    volume_size = 10
  }

  tags = {
    Name = "jumpbox"
  }
}

# Define the Kubernetes server instance
resource "aws_instance" "server" {
  ami                         = local.ami_id
  instance_type               = "t4g.small" # 2 vCPU, 2GB RAM
  vpc_security_group_ids      = [local.sg_id]
  subnet_id                   = local.vpc_subnet_id
  associate_public_ip_address = true
  iam_instance_profile        = aws_iam_instance_profile.ec2_profile.name
  user_data                   = local.user_data

  root_block_device {
    volume_size = 20
  }

  tags = {
    Name = "server"
  }
}

# Define the Kubernetes worker nodes
resource "aws_instance" "node" {
  count                       = 2
  ami                         = local.ami_id
  instance_type               = "t4g.small" # 2 vCPU, 2GB RAM
  vpc_security_group_ids      = [local.sg_id]
  subnet_id                   = local.vpc_subnet_id
  associate_public_ip_address = true
  iam_instance_profile        = aws_iam_instance_profile.ec2_profile.name
  user_data                   = local.user_data

  root_block_device {
    volume_size = 20
  }

  tags = {
    Name = "node-${count.index}"
  }
}

koh-sh

jumpboxで実施

apt -y install wget curl vim openssl git
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
E: Unable to locate package git

最初にapt-get updateしないとinstallにこけたのでコマンド追加。他は手順通り。

koh-sh

machines.txtを作る。
ipは構築したEC2のIPを取ってくる

root@ip-172-31-7-3:~/kubernetes-the-hard-way# cat machines.txt
172.31.9.224 server.kubernetes.local server
172.31.5.186 node-0.kubernetes.local node-0 10.200.0.0/24
172.31.11.36 node-1.kubernetes.local node-1 10.200.0.0/24
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

そのままコマンド実行。
hostnameを書き換えるところは、hostsのentryにsedが引っ掛からなかったので修正

while read IP FQDN HOST SUBNET; do
      CMD="sed -i 's/^127.0.0.1/127.0.1.1\t${FQDN} ${HOST}/' /etc/hosts"
     ssh -n root@${IP} "$CMD"
     ssh -n root@${IP} hostnamectl hostname ${HOST}
done < machines.txt

koh-sh

jumpboxでcertを作って書くhostに撒く。

koh-sh

kubeconfigを作って各hostに撒く
kubelet, kube-proxyは node-{0,1}
admin, kube-controller-manager, kube-schedulerはserverへ

koh-sh

暗号化のための鍵作成

encryption-config.yamlがない
issueがあったのでそこのコメントからconfigコピーする

apiVersion: apiserver.config.k8s.io/v1
kind: EncryptionConfiguration
resources:
  - resources:
      - secrets
    providers:
      - aescbc:
          keys:
            - name: key1
              secret: ${ENCRYPTION_KEY}
      - identity: {}

koh-sh

etcd cluster構築
server でetcd service起動

koh-sh

control plane構築
api server, scheduler, controller manager

root@server:~# kubectl cluster-info \
  --kubeconfig admin.kubeconfig
Kubernetes control plane is running at https://127.0.0.1:6443

To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'.
root@server:~#

ここで一旦 control planeは動き出してる様子。

RBAC設定

koh-sh

worker側の構築
containerd kubelet kube-proxy

root@ip-172-31-7-3:~/kubernetes-the-hard-way# ssh root@server   "kubectl get nodes \
  --kubeconfig admin.kubeconfig"
NAME     STATUS   ROLES    AGE   VERSION
node-0   Ready    <none>   71s   v1.28.3
node-1   Ready    <none>   6s    v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

koh-sh

jumpboxのclient設定
これでkubectl叩けるように。

root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl version
Client Version: v1.28.3
Kustomize Version: v5.0.4-0.20230601165947-6ce0bf390ce3
Server Version: v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl get nodes
NAME     STATUS   ROLES    AGE     VERSION
node-0   Ready    <none>   2m40s   v1.28.3
node-1   Ready    <none>   95s     v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

koh-sh

https://github.com/kelseyhightower/kubernetes-the-hard-way/blob/master/docs/11-pod-network-routes.md
pod用NW設定
※ ここでmachine.txtのnode-1用のsegment間違えてたことに気付く

node-0,1両方とも 10.200.0.0/24 にしていたが、node-1は10.200.1.0/24でずらさないといけない

node-1で10-bridge.confを修正してcontainerd kubelet kube-proxyを再起動しておく

https://github.com/kelseyhightower/kubernetes-the-hard-way/blob/master/docs/09-bootstrapping-kubernetes-workers.md
各ホストのroutingも修正

koh-sh

最後にテスト

各種動作OK

root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl get pods -o wide
NAME                     READY   STATUS    RESTARTS   AGE     IP           NODE     NOMINATED NODE   READINESS GATES
nginx-56fcf95486-8vpj8   1/1     Running   0          4m12s   10.200.0.2   node-0   <none>           <none>
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

koh-sh

nodeの操作を色々試してみる

node-0 -> node-1にpodが移ったが、一瞬downtimeあった気がする。設定次第かな

node-1をコンパネから停止する

root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl get nodes
NAME     STATUS     ROLES    AGE   VERSION
node-0   Ready      <none>   37m   v1.28.3
node-1   NotReady   <none>   36m   v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

podが勝手にnode-0に移るみたいなのを期待したけどそうはならなさそう。

root@ip-172-31-7-3:~/kubernetes-the-hard-way#  kubectl get pods -A -o wide
NAMESPACE   NAME                     READY   STATUS    RESTARTS   AGE     IP           NODE     NOMINATED NODE   READINESS GATES
default     nginx-56fcf95486-8ccdw   1/1     Running   0          6m15s   10.200.1.3   node-1   <none>           <none>
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl exec -ti pod/nginx-56fcf95486-8ccdw -- nginx -v
Error from server: error dialing backend: dial tcp 172.31.11.36:10250: connect: no route to host
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

と思ったら数分したら復旧した。

root@ip-172-31-7-3:~/kubernetes-the-hard-way#  kubectl get pods -A -o wide
NAMESPACE   NAME                     READY   STATUS        RESTARTS   AGE     IP           NODE     NOMINATED NODE   READINESS GATES
default     nginx-56fcf95486-8ccdw   1/1     Terminating   0          7m55s   10.200.1.3   node-1   <none>           <none>
default     nginx-56fcf95486-tk8ff   1/1     Running       0          35s     10.200.0.3   node-0   <none>           <none>
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

koh-sh

これはこの記事がわかりやすい

koh-sh

最後片付け

terraform destroy

お疲れ様でした。

koh-sh

ref