Kubernetes The Hard Wayをやる
2024/04に更新があり、Iaasの指定がなくなったりARM移行などしている。
AWSで進めていく
terraformで構築。
session managerでログインしたいのでuser dataやroleなど設定。
デフォルトVPC使ってるのでpub ipも付与
# Configure for your env
locals {
region = "ap-northeast-1" # Replace with your desired region
sg_id = "" # Replace with your existing security group ID
vpc_subnet_id = "" # Replace with your subnet ID
ami_id = "ami-078255fea9b2e6223" # Debian 12 ARM64 AMI ID
# https://docs.aws.amazon.com/ja_jp/systems-manager/latest/userguide/agent-install-deb.html
user_data = <<-EOF
#!/bin/bash
mkdir /tmp/ssm
cd /tmp/ssm
wget https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/debian_arm64/amazon-ssm-agent.deb
sudo dpkg -i amazon-ssm-agent.deb
sudo systemctl enable amazon-ssm-agent
EOF
}
# Configure the AWS Provider
provider "aws" {
region = local.region
}
# Create new IAM role
resource "aws_iam_role" "ec2_base" {
name = "ssm-role"
description = "Allows EC2 instances to call AWS services like CloudWatch and Systems Manager on your behalf."
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "ec2.amazonaws.com"
}
}
]
})
}
# Attach AmazonSSMManagedInstanceCore policy to the role
resource "aws_iam_role_policy_attachment" "session_manager_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
role = aws_iam_role.ec2_base.id
}
# Create an instance profile for the role
resource "aws_iam_instance_profile" "ec2_profile" {
name = "ec2-ssm-profile"
role = aws_iam_role.ec2_base.name
}
# Define the jumpbox instance
resource "aws_instance" "jumpbox" {
ami = local.ami_id
instance_type = "t4g.nano" # 1 vCPU, 0.5GB RAM
vpc_security_group_ids = [local.sg_id]
subnet_id = local.vpc_subnet_id
associate_public_ip_address = true
iam_instance_profile = aws_iam_instance_profile.ec2_profile.name
user_data = local.user_data
root_block_device {
volume_size = 10
}
tags = {
Name = "jumpbox"
}
}
# Define the Kubernetes server instance
resource "aws_instance" "server" {
ami = local.ami_id
instance_type = "t4g.small" # 2 vCPU, 2GB RAM
vpc_security_group_ids = [local.sg_id]
subnet_id = local.vpc_subnet_id
associate_public_ip_address = true
iam_instance_profile = aws_iam_instance_profile.ec2_profile.name
user_data = local.user_data
root_block_device {
volume_size = 20
}
tags = {
Name = "server"
}
}
# Define the Kubernetes worker nodes
resource "aws_instance" "node" {
count = 2
ami = local.ami_id
instance_type = "t4g.small" # 2 vCPU, 2GB RAM
vpc_security_group_ids = [local.sg_id]
subnet_id = local.vpc_subnet_id
associate_public_ip_address = true
iam_instance_profile = aws_iam_instance_profile.ec2_profile.name
user_data = local.user_data
root_block_device {
volume_size = 20
}
tags = {
Name = "node-${count.index}"
}
}
jumpboxで実施
apt -y install wget curl vim openssl git
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
E: Unable to locate package git
最初にapt-get updateしないとinstallにこけたのでコマンド追加。他は手順通り。
machines.txtを作る。
ipは構築したEC2のIPを取ってくる
root@ip-172-31-7-3:~/kubernetes-the-hard-way# cat machines.txt
172.31.9.224 server.kubernetes.local server
172.31.5.186 node-0.kubernetes.local node-0 10.200.0.0/24
172.31.11.36 node-1.kubernetes.local node-1 10.200.0.0/24
root@ip-172-31-7-3:~/kubernetes-the-hard-way#
そのままコマンド実行。
hostnameを書き換えるところは、hostsのentryにsedが引っ掛からなかったので修正
while read IP FQDN HOST SUBNET; do
CMD="sed -i 's/^127.0.0.1/127.0.1.1\t${FQDN} ${HOST}/' /etc/hosts"
ssh -n root@${IP} "$CMD"
ssh -n root@${IP} hostnamectl hostname ${HOST}
done < machines.txt
jumpboxでcertを作って書くhostに撒く。
kubeconfigを作って各hostに撒く
kubelet, kube-proxyは node-{0,1}
admin, kube-controller-manager, kube-schedulerはserverへ
暗号化のための鍵作成
encryption-config.yamlがない
issueがあったのでそこのコメントからconfigコピーする
apiVersion: apiserver.config.k8s.io/v1
kind: EncryptionConfiguration
resources:
- resources:
- secrets
providers:
- aescbc:
keys:
- name: key1
secret: ${ENCRYPTION_KEY}
- identity: {}
etcd cluster構築
server でetcd service起動
control plane構築
api server, scheduler, controller manager
root@server:~# kubectl cluster-info \
--kubeconfig admin.kubeconfig
Kubernetes control plane is running at https://127.0.0.1:6443
To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'.
root@server:~#
ここで一旦 control planeは動き出してる様子。
RBAC設定
worker側の構築
containerd kubelet kube-proxy
root@ip-172-31-7-3:~/kubernetes-the-hard-way# ssh root@server "kubectl get nodes \
--kubeconfig admin.kubeconfig"
NAME STATUS ROLES AGE VERSION
node-0 Ready <none> 71s v1.28.3
node-1 Ready <none> 6s v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way#
jumpboxのclient設定
これでkubectl叩けるように。
root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl version
Client Version: v1.28.3
Kustomize Version: v5.0.4-0.20230601165947-6ce0bf390ce3
Server Version: v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl get nodes
NAME STATUS ROLES AGE VERSION
node-0 Ready <none> 2m40s v1.28.3
node-1 Ready <none> 95s v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way#
pod用NW設定
※ ここでmachine.txtのnode-1用のsegment間違えてたことに気付く
node-0,1両方とも 10.200.0.0/24 にしていたが、node-1は10.200.1.0/24でずらさないといけない
node-1で10-bridge.confを修正してcontainerd kubelet kube-proxyを再起動しておく
各ホストのroutingも修正
最後にテスト
各種動作OK
root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-56fcf95486-8vpj8 1/1 Running 0 4m12s 10.200.0.2 node-0 <none> <none>
root@ip-172-31-7-3:~/kubernetes-the-hard-way#
nodeの操作を色々試してみる
node-0 -> node-1にpodが移ったが、一瞬downtimeあった気がする。設定次第かな
node-1をコンパネから停止する
root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl get nodes
NAME STATUS ROLES AGE VERSION
node-0 Ready <none> 37m v1.28.3
node-1 NotReady <none> 36m v1.28.3
root@ip-172-31-7-3:~/kubernetes-the-hard-way#
podが勝手にnode-0に移るみたいなのを期待したけどそうはならなさそう。
root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl get pods -A -o wide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
default nginx-56fcf95486-8ccdw 1/1 Running 0 6m15s 10.200.1.3 node-1 <none> <none>
root@ip-172-31-7-3:~/kubernetes-the-hard-way#
root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl exec -ti pod/nginx-56fcf95486-8ccdw -- nginx -v
Error from server: error dialing backend: dial tcp 172.31.11.36:10250: connect: no route to host
root@ip-172-31-7-3:~/kubernetes-the-hard-way#
と思ったら数分したら復旧した。
root@ip-172-31-7-3:~/kubernetes-the-hard-way# kubectl get pods -A -o wide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
default nginx-56fcf95486-8ccdw 1/1 Terminating 0 7m55s 10.200.1.3 node-1 <none> <none>
default nginx-56fcf95486-tk8ff 1/1 Running 0 35s 10.200.0.3 node-0 <none> <none>
root@ip-172-31-7-3:~/kubernetes-the-hard-way#
これはこの記事がわかりやすい
最後片付け
terraform destroy
お疲れ様でした。
ref