Closed17

Kubernetes The Hard Wayをやる

koh-shkoh-sh

https://github.com/kelseyhightower/kubernetes-the-hard-way/blob/master/docs/01-prerequisites.md

terraformで構築。
session managerでログインしたいのでuser dataやroleなど設定。
デフォルトVPC使ってるのでpub ipも付与

# Configure for your env
locals {
  region        = "ap-northeast-1"           # Replace with your desired region
  sg_id         = ""                         # Replace with your existing security group ID
  vpc_subnet_id = ""                         # Replace with your subnet ID
  ami_id        = "ami-078255fea9b2e6223"    # Debian 12 ARM64 AMI ID
  # https://docs.aws.amazon.com/ja_jp/systems-manager/latest/userguide/agent-install-deb.html
  user_data = <<-EOF
    #!/bin/bash
    mkdir /tmp/ssm
    cd /tmp/ssm
    wget https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/debian_arm64/amazon-ssm-agent.deb
    sudo dpkg -i amazon-ssm-agent.deb
    sudo systemctl enable amazon-ssm-agent
  EOF
}

# Configure the AWS Provider
provider "aws" {
  region  = local.region
}

# Create new IAM role
resource "aws_iam_role" "ec2_base" {
  name        = "ssm-role"
  description = "Allows EC2 instances to call AWS services like CloudWatch and Systems Manager on your behalf."
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Principal = {
          Service = "ec2.amazonaws.com"
        }
      }
    ]
  })
}

# Attach AmazonSSMManagedInstanceCore policy to the role
resource "aws_iam_role_policy_attachment" "session_manager_policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
  role       = aws_iam_role.ec2_base.id
}

# Create an instance profile for the role
resource "aws_iam_instance_profile" "ec2_profile" {
  name = "ec2-ssm-profile"
  role = aws_iam_role.ec2_base.name
}

# Define the jumpbox instance
resource "aws_instance" "jumpbox" {
  ami                         = local.ami_id
  instance_type               = "t4g.nano" # 1 vCPU, 0.5GB RAM
  vpc_security_group_ids      = [local.sg_id]
  subnet_id                   = local.vpc_subnet_id
  associate_public_ip_address = true
  iam_instance_profile        = aws_iam_instance_profile.ec2_profile.name
  user_data                   = local.user_data

  root_block_device {
    volume_size = 10
  }

  tags = {
    Name = "jumpbox"
  }
}

# Define the Kubernetes server instance
resource "aws_instance" "server" {
  ami                         = local.ami_id
  instance_type               = "t4g.small" # 2 vCPU, 2GB RAM
  vpc_security_group_ids      = [local.sg_id]
  subnet_id                   = local.vpc_subnet_id
  associate_public_ip_address = true
  iam_instance_profile        = aws_iam_instance_profile.ec2_profile.name
  user_data                   = local.user_data

  root_block_device {
    volume_size = 20
  }

  tags = {
    Name = "server"
  }
}

# Define the Kubernetes worker nodes
resource "aws_instance" "node" {
  count                       = 2
  ami                         = local.ami_id
  instance_type               = "t4g.small" # 2 vCPU, 2GB RAM
  vpc_security_group_ids      = [local.sg_id]
  subnet_id                   = local.vpc_subnet_id
  associate_public_ip_address = true
  iam_instance_profile        = aws_iam_instance_profile.ec2_profile.name
  user_data                   = local.user_data

  root_block_device {
    volume_size = 20
  }

  tags = {
    Name = "node-${count.index}"
  }
}
koh-shkoh-sh

https://github.com/kelseyhightower/kubernetes-the-hard-way/blob/master/docs/03-compute-resources.md

machines.txtを作る。
ipは構築したEC2のIPを取ってくる

root@ip-172-31-7-3:~/kubernetes-the-hard-way# cat machines.txt
172.31.9.224 server.kubernetes.local server
172.31.5.186 node-0.kubernetes.local node-0 10.200.0.0/24
172.31.11.36 node-1.kubernetes.local node-1 10.200.0.0/24
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

そのままコマンド実行。
hostnameを書き換えるところは、hostsのentryにsedが引っ掛からなかったので修正

while read IP FQDN HOST SUBNET; do
      CMD="sed -i 's/^127.0.0.1/127.0.1.1\t${FQDN} ${HOST}/' /etc/hosts"
     ssh -n root@${IP} "$CMD"
     ssh -n root@${IP} hostnamectl hostname ${HOST}
done < machines.txt
koh-shkoh-sh

https://github.com/kelseyhightower/kubernetes-the-hard-way/blob/master/docs/06-data-encryption-keys.md

暗号化のための鍵作成

encryption-config.yamlがない
issueがあったのでそこのコメントからconfigコピーする
https://github.com/kelseyhightower/kubernetes-the-hard-way/issues/787

apiVersion: apiserver.config.k8s.io/v1
kind: EncryptionConfiguration
resources:
  - resources:
      - secrets
    providers:
      - aescbc:
          keys:
            - name: key1
              secret: ${ENCRYPTION_KEY}
      - identity: {}
koh-shkoh-sh

https://github.com/kelseyhightower/kubernetes-the-hard-way/blob/master/docs/10-configuring-kubectl.md

jumpboxのclient設定
これでkubectl叩けるように。

root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl version
Client Version: v1.28.3
Kustomize Version: v5.0.4-0.20230601165947-6ce0bf390ce3
Server Version: v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl get nodes
NAME     STATUS   ROLES    AGE     VERSION
node-0   Ready    <none>   2m40s   v1.28.3
node-1   Ready    <none>   95s     v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way#
koh-shkoh-sh

https://github.com/kelseyhightower/kubernetes-the-hard-way/blob/master/docs/11-pod-network-routes.md

pod用NW設定

※ ここでmachine.txtのnode-1用のsegment間違えてたことに気付く
node-0,1両方とも 10.200.0.0/24 にしていたが、node-1は10.200.1.0/24でずらさないといけない
node-1で10-bridge.confを修正してcontainerd kubelet kube-proxyを再起動しておく
https://github.com/kelseyhightower/kubernetes-the-hard-way/blob/master/docs/09-bootstrapping-kubernetes-workers.md

各ホストのroutingも修正

koh-shkoh-sh

nodeの操作を色々試してみる

https://qiita.com/honma-h/items/08fae1ec4b2045e03b26

node-0 -> node-1にpodが移ったが、一瞬downtimeあった気がする。設定次第かな

node-1をコンパネから停止する

root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl get nodes
NAME     STATUS     ROLES    AGE   VERSION
node-0   Ready      <none>   37m   v1.28.3
node-1   NotReady   <none>   36m   v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

podが勝手にnode-0に移るみたいなのを期待したけどそうはならなさそう。

root@ip-172-31-7-3:~/kubernetes-the-hard-way#  kubectl get pods -A -o wide
NAMESPACE   NAME                     READY   STATUS    RESTARTS   AGE     IP           NODE     NOMINATED NODE   READINESS GATES
default     nginx-56fcf95486-8ccdw   1/1     Running   0          6m15s   10.200.1.3   node-1   <none>           <none>
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl exec -ti pod/nginx-56fcf95486-8ccdw -- nginx -v
Error from server: error dialing backend: dial tcp 172.31.11.36:10250: connect: no route to host
root@ip-172-31-7-3:~/kubernetes-the-hard-way#

と思ったら数分したら復旧した。

root@ip-172-31-7-3:~/kubernetes-the-hard-way#  kubectl get pods -A -o wide
NAMESPACE   NAME                     READY   STATUS        RESTARTS   AGE     IP           NODE     NOMINATED NODE   READINESS GATES
default     nginx-56fcf95486-8ccdw   1/1     Terminating   0          7m55s   10.200.1.3   node-1   <none>           <none>
default     nginx-56fcf95486-tk8ff   1/1     Running       0          35s     10.200.0.3   node-0   <none>           <none>
root@ip-172-31-7-3:~/kubernetes-the-hard-way#
このスクラップは2ヶ月前にクローズされました