6주차 주제는 eks security이다.

 

사전준비는 아래와 같다.

# YAML 파일 다운로드
curl -O <https://s3.ap-northeast-2.amazonaws.com/cloudformation.cloudneta.net/K8S/**eks-oneclick5.yaml**>

# CloudFormation 스택 배포
예시) aws cloudformation deploy --template-file **eks-oneclick5.yaml** --stack-name **myeks** --parameter-overrides KeyName=**kp-gasida** SgIngressSshCidr=$(curl -s ipinfo.io/ip)/32  MyIamUserAccessKeyID=**AKIA5...** MyIamUserSecretAccessKey=**'CVNa2...'** ClusterBaseName=**myeks** --region ap-northeast-2

# CloudFormation 스택 배포 완료 후 작업용 EC2 IP 출력
aws cloudformation describe-stacks --stack-name **myeks** --query 'Stacks[*].**Outputs[0]**.OutputValue' --output text

# 작업용 EC2 SSH 접속
ssh -i **~/.ssh/kp-gasida.pem** ec2-user@$(aws cloudformation describe-stacks --stack-name **myeks** --query 'Stacks[*].Outputs[0].OutputValue' --output text)
  • 기본 설정
# default 네임스페이스 적용
kubectl ns default

# (옵션) context 이름 변경
NICK=<각자 자신의 닉네임>
NICK=gasida
kubectl ctx
kubectl config rename-context admin@myeks.ap-northeast-2.eksctl.io $NICK

# ExternalDNS
MyDomain=<자신의 도메인>
echo "export MyDomain=<자신의 도메인>" >> /etc/profile
*MyDomain=gasida.link*
*echo "export MyDomain=gasida.link" >> /etc/profile*
MyDnzHostedZoneId=$(aws route53 list-hosted-zones-by-name --dns-name "${MyDomain}." --query "HostedZones[0].Id" --output text)
echo $MyDomain, $MyDnzHostedZoneId
curl -s -O <https://raw.githubusercontent.com/gasida/PKOS/main/aews/externaldns.yaml>
MyDomain=$MyDomain MyDnzHostedZoneId=$MyDnzHostedZoneId envsubst < externaldns.yaml | kubectl apply -f -

# AWS LB Controller
helm repo add eks <https://aws.github.io/eks-charts>
helm repo update
helm install aws-load-balancer-controller eks/aws-load-balancer-controller -n kube-system --set clusterName=$CLUSTER_NAME \\
  --set serviceAccount.create=false --set serviceAccount.name=aws-load-balancer-controller

# 노드 IP 확인 및 PrivateIP 변수 지정
N1=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2a -o jsonpath={.items[0].status.addresses[0].address})
N2=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2b -o jsonpath={.items[0].status.addresses[0].address})
N3=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2c -o jsonpath={.items[0].status.addresses[0].address})
echo "export N1=$N1" >> /etc/profile
echo "export N2=$N2" >> /etc/profile
echo "export N3=$N3" >> /etc/profile
echo $N1, $N2, $N3

# 노드 보안그룹 ID 확인
NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values='*ng1*' --query "SecurityGroups[*].[GroupId]" --output text)
aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr 192.168.1.0/24

# 워커 노드 SSH 접속
for node in $N1 $N2 $N3; do ssh ec2-user@$node hostname; done
  • 프로메테우스 & 그라파나(admin / prom-operator) 설치 : 대시보드 추천 15757 17900 15172
# 사용 리전의 인증서 ARN 확인
CERT_ARN=`aws acm list-certificates --query 'CertificateSummaryList[].CertificateArn[]' --output text`
echo $CERT_ARN

# repo 추가
helm repo add prometheus-community <https://prometheus-community.github.io/helm-charts>

# 파라미터 파일 생성
cat < monitor-values.yaml
**prometheus**:
  prometheusSpec:
    podMonitorSelectorNilUsesHelmValues: false
    serviceMonitorSelectorNilUsesHelmValues: false
    retention: 5d
    retentionSize: "10GiB"

  ingress:
    enabled: true
    ingressClassName: alb
    hosts: 
      - prometheus.$MyDomain
    paths: 
      - /*
    annotations:
      alb.ingress.kubernetes.io/scheme: internet-facing
      alb.ingress.kubernetes.io/target-type: ip
      alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
      alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
      alb.ingress.kubernetes.io/success-codes: 200-399
      alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
      alb.ingress.kubernetes.io/group.name: study
      alb.ingress.kubernetes.io/ssl-redirect: '443'

grafana:
  defaultDashboardsTimezone: Asia/Seoul
  adminPassword: prom-operator

  ingress:
    enabled: true
    ingressClassName: alb
    hosts: 
      - grafana.$MyDomain
    paths: 
      - /*
    annotations:
      alb.ingress.kubernetes.io/scheme: internet-facing
      alb.ingress.kubernetes.io/target-type: ip
      alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
      alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
      alb.ingress.kubernetes.io/success-codes: 200-399
      alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
      alb.ingress.kubernetes.io/group.name: study
      alb.ingress.kubernetes.io/ssl-redirect: '443'

defaultRules:
  create: false
kubeControllerManager:
  enabled: false
kubeEtcd:
  enabled: false
kubeScheduler:
  enabled: false
alertmanager:
  enabled: false
EOT

# 배포
kubectl create ns monitoring
helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version 45.27.2 \\
--set prometheus.prometheusSpec.scrapeInterval='15s' --set prometheus.prometheusSpec.evaluationInterval='15s' \\
-f monitor-values.yaml --namespace monitoring

# Metrics-server 배포
kubectl apply -f <https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml>

 

EKS 인증 / 인가 설명에 앞서 AWS IAM과 K8S 인증 관련 지식이 있는 상태에서 실습이 시작된다.

 

EKS는 인증은 AWS IAM에서 진행하고 인가는 K8S RBAC을 사용한다.

RBAC 관련 롤을 확인하는것은 아래와 같다. 

# 설치
**kubectl krew install access-matrix rbac-tool rbac-view rolesum**

# Show an RBAC access matrix for server resources
**kubectl access-matrix** # Review access to cluster-scoped resources
kubectl access-matrix --namespace default # Review access to namespaced resources in 'default'

# RBAC Lookup by subject (user/group/serviceaccount) name
kubectl rbac-tool lookup
**kubectl rbac-tool lookup system:masters**
  SUBJECT        | SUBJECT TYPE | SCOPE       | NAMESPACE | ROLE
+----------------+--------------+-------------+-----------+---------------+
  system:masters | Group        | ClusterRole |           | cluster-admin

kubectl rbac-tool lookup system:nodes # eks:node-bootstrapper
kubectl rbac-tool lookup system:bootstrappers # eks:node-bootstrapper
**kubectl describe ClusterRole eks:node-bootstrapper**

# RBAC List Policy Rules For subject (user/group/serviceaccount) name
kubectl rbac-tool policy-rules
kubectl rbac-tool policy-rules -e '^system:.*'

# Generate ClusterRole with all available permissions from the target cluster
kubectl rbac-tool show

# Shows the subject for the current context with which one authenticates with the cluster
**kubectl rbac-tool whoami**
{Username: "kubernetes-admin",
 UID:      "aws-iam-authenticator:911283.....:AIDA5ILF2FJ......",
 Groups:   ["system:masters",
            "system:authenticated"],
 Extra:    {**accessKeyId**:  ["AKIA5ILF2FJ....."],
            arn:          ["arn:aws:iam::911283....:user/admin"],
            canonicalArn: ["arn:aws:iam::911283....:user/admin"],
            principalId:  ["AIDA5ILF2FJ....."],
            sessionName:  [""]}}

# Summarize RBAC roles for subjects : ServiceAccount(default), User, Group
kubectl rolesum -h
**kubectl rolesum aws-node -n kube-system**
kubectl rolesum -k User system:kube-proxy
**kubectl rolesum -k Group system:masters**

# [터미널1] A tool to visualize your RBAC permissions
**kubectl rbac-view**
INFO[0000] Getting K8s client
INFO[0000] serving RBAC View and <http://localhost:8800>

## 이후 해당 작업용PC 공인 IP:8800 웹 접속
echo -e "RBAC View Web <http://$>(curl -s ipinfo.io/ip):8800"

 

https://youtu.be/bksogA-WXv8

참고영상

 

kubectl로 AWS IAM 을 통해 위와같이 STS 토큰을 받아온다. 해당 토큰을 디코딩하면 GetCallerIdentity 나 버전 expiredate등이 포함돼있다.

 

 

해당 토큰을 EKS api에 가 받으면 Webhook token authenticator에 요청한다.

이후 AWS STS(AWS IAM)이 응답을 해주고(인증이 완료되면) User/Role에 대한 ARN을 반환한다.

 

이후 K8S에서 RBAC 인가를 처리한다. 

# Webhook api 리소스 확인 
**kubectl api-resources | grep Webhook**
mutatingwebhookconfigurations                  admissionregistration.k8s.io/v1        false        MutatingWebhookConfiguration
**validatingwebhookconfigurations**                admissionregistration.k8s.io/v1        false        ValidatingWebhookConfiguration

# validatingwebhookconfigurations 리소스 확인
**kubectl get validatingwebhookconfigurations**
NAME                                        WEBHOOKS   AGE
eks-aws-auth-configmap-validation-webhook   1          50m
vpc-resource-validating-webhook             2          50m
aws-load-balancer-webhook                   3          8m27s

**kubectl get validatingwebhookconfigurations eks-aws-auth-configmap-validation-webhook -o yaml | kubectl neat | yh**

# aws-auth 컨피그맵 확인
**kubectl get cm -n kube-system aws-auth -o yaml | kubectl neat | yh**
apiVersion: v1
kind: ConfigMap
metadata: 
  name: aws-auth
  namespace: kube-system
data: 
  **mapRoles**: |
    - groups:
      - system:bootstrappers
      - system:nodes
      rolearn: arn:aws:iam::91128.....:role/eksctl-myeks-nodegroup-ng1-NodeInstanceRole-1OS1WSTV0YB9X
      username: system:node:{{EC2PrivateDNSName}}
*#---<아래 생략(추정), ARN은 EKS를 설치한 IAM User , 여기 있었을경우 만약 실수로 삭제 시 복구가 가능했을까?---
  **mapUsers**: |
    - groups:
      - **system:masters**
      userarn: arn:aws:iam::111122223333:user/**admin**
      username: **kubernetes-admin***

# EKS 설치한 IAM User 정보 >> system:authenticated는 어떤 방식으로 추가가 되었는지 궁금???
**kubectl rbac-tool whoami**
{Username: "kubernetes-admin",
 UID:      "aws-iam-authenticator:9112834...:AIDA5ILF2FJIR2.....",
 Groups:   ["system:masters",
            "system:authenticated"],
...

# system:masters , system:authenticated 그룹의 정보 확인
kubectl rbac-tool lookup system:masters
kubectl rbac-tool lookup system:authenticated
kubectl rolesum -k Group system:masters
kubectl rolesum -k Group system:authenticated

# system:masters 그룹이 사용 가능한 클러스터 롤 확인 : cluster-admin
**kubectl describe clusterrolebindings.rbac.authorization.k8s.io cluster-admin**
Name:         cluster-admin
Labels:       kubernetes.io/bootstrapping=rbac-defaults
Annotations:  rbac.authorization.kubernetes.io/autoupdate: true
Role:
  Kind:  ClusterRole
  Name:  **cluster-admin**
Subjects:
  Kind   Name            Namespace
  ----   ----            ---------
  Group  **system:masters**

# cluster-admin 의 PolicyRule 확인 : 모든 리소스  사용 가능!
**kubectl describe clusterrole cluster-admin**
Name:         cluster-admin
Labels:       kubernetes.io/bootstrapping=rbac-defaults
Annotations:  rbac.authorization.kubernetes.io/autoupdate: true
PolicyRule:
  **Resources**  **Non-Resource URLs**  Resource Names  Verbs
  ---------  -----------------  --------------  -----
  ***.***        []                 []              [*****]
             [*****]                []              [*]

# system:authenticated 그룹이 사용 가능한 클러스터 롤 확인
kubectl describe ClusterRole **system:discovery**
kubectl describe ClusterRole **system:public-info-viewer**
kubectl describe ClusterRole **system:basic-user**
kubectl describe ClusterRole **eks:podsecuritypolicy:privileged**

 

위와같이 인증/인가 흐름을 알아보았다.

 

이번엔 실제 권한을 할당하는 실습을 진행해보겠다.

# testuser 사용자 생성
aws iam create-user --user-name **testuser**

# 사용자에게 프로그래밍 방식 액세스 권한 부여
aws iam create-access-key --user-name **testuser**
{
    "AccessKey": {
        "UserName": "testuser",
        "**AccessKeyId**": "AKIA5ILF2##",
        "Status": "Active",
        "**SecretAccessKey**": "TxhhwsU8##",
        "CreateDate": "2023-05-23T07:40:09+00:00"
    }
}
# testuser 사용자에 정책을 추가
aws iam attach-user-policy --policy-arn arn:aws:iam::aws:policy/**AdministratorAccess** --user-name **testuser**

# get-caller-identity 확인
aws sts get-caller-identity --query Arn
"arn:aws:iam::911283464785:user/admin"

# EC2 IP 확인 : myeks-bastion-EC2-2 PublicIPAdd 확인
**aws ec2 describe-instances --query "Reservations[*].Instances[*].{PublicIPAdd:PublicIpAddress,PrivateIPAdd:PrivateIpAddress,InstanceName:Tags[?Key=='Name']|[0].Value,Status:State.Name}" --filters Name=instance-state-name,Values=running --output table**

위와같이 사용자 생성하고 키&정책 부여해준다.

# get-caller-identity 확인 >> 왜 안될까요?
**aws sts get-caller-identity --query Arn**

# testuser 자격증명 설정
**aws configure**
AWS Access Key ID [None]: *AKIA5ILF2F...*
AWS Secret Access Key [None]: *ePpXdhA3cP....*
Default region name [None]: ***ap-northeast-2***

# get-caller-identity 확인
**aws sts get-caller-identity --query Arn**
"arn:aws:iam::911283464785:user/**testuser**"

# kubectl 시도 >> testuser도 **AdministratorAccess** 권한을 가지고 있는데, 실패 이유는?
**kubectl get node -v6**
ls ~/.kube

그다음 위와같이 aws계정 연동을 진행해주고

# 방안1 : eksctl 사용 >> iamidentitymapping 실행 시 aws-auth 컨피그맵 작성해줌
# Creates a mapping from IAM role or user to Kubernetes user and groups
eksctl create iamidentitymapping --cluster $**CLUSTER_NAME** --username **testuser** --group **system:masters** --arn arn:aws:iam::$ACCOUNT_ID:user/**testuser**

# 확인
**kubectl get cm -n kube-system aws-auth -o yaml | kubectl neat | yh**
...

~~# 방안2 : 아래 edit로 mapUsers 내용 직접 추가!
**kubectl edit cm -n kube-system aws-auth**
---
apiVersion: v1
data: 
  mapRoles: |
    - groups:
      - system:bootstrappers
      - system:nodes
      rolearn: arn:aws:iam::911283464785:role/eksctl-myeks-nodegroup-ng1-NodeInstanceRole-LHQ7DWHQQRZJ
      username: system:node:{{EC2PrivateDNSName}}
  **mapUsers: |
    - groups:
      - system:masters
      userarn: arn:aws:iam::911283464785:user/testuser
      username: testuser**
...~~

# 확인 : 기존에 있는 **role**/eksctl-myeks-nodegroup-ng1-NodeInstanceRole-YYYYY 는 어떤 역할/동작을 하는 걸까요?
**eksctl get iamidentitymapping --cluster $CLUSTER_NAME**
ARN											USERNAME				GROUPS					ACCOUNT
arn:aws:iam::911283464785:**role**/eksctl-myeks-nodegroup-ng1-NodeInstanceRole-LHQ7DWHQQRZJ	system:node:{{EC2PrivateDNSName}}	system:bootstrappers,system:nodes	
arn:aws:iam::911283464785:**user**/testuser							testuser				system:masters

위와같이 새로 생성한 testuesr 계정에다가 EKS 관리자 권한을ㄹ 부여한다.

 

권한이 부여됨을 확인할 수 있다.

 

edit를 통해 mapUsers를 직접 수정해줄수도 있다. 

# 방안2 : 아래 edit로 mapUsers 내용 직접 수정 **system:authenticated**
~~~~**kubectl edit cm -n kube-system aws-auth**
...

# 확인
eksctl get iamidentitymapping --cluster $CLUSTER_NAME

 

Testuser를 삭제하는 방법은 다음과 같다.

# testuser IAM 맵핑 삭제
eksctl **delete** iamidentitymapping --cluster $CLUSTER_NAME --arn  arn:aws:iam::$ACCOUNT_ID:user/testuser

# Get IAM identity mapping(s)
eksctl get iamidentitymapping --cluster $CLUSTER_NAME
kubectl get cm -n kube-system aws-auth -o yaml | yh

지금까지는 사용자/애플리케이션 > k8s 이용시에 대한 인증/인가에 대해 알아보았다.

 

지금부터는 k8s파드 > aws 서비스 이용시에 대해 알아보도록 한다. IRSA이다.

요점은 OIDC를 통해 인증/인가를 진행한다.

 

 

실습을 진행해보자.

# 파드1 생성
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
  name: eks-iam-test1
spec:
  containers:
    - name: my-aws-cli
      image: amazon/aws-cli:latest
      args: ['s3', 'ls']
  restartPolicy: Never
  **automountServiceAccountToken: false**
EOF

# 확인
kubectl get pod
kubectl describe pod

# 로그 확인
kubectl logs eks-iam-test1

# 파드1 삭제
kubectl delete pod eks-iam-test1

automountServiceAccountToken: false

위와같이 자동 발급을 false 해놓으면 

 

 

위와같은 결과가 나온다.

 

두번째 실습이다.

 

# 파드2 생성
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
  name: eks-iam-test2
spec:
  containers:
    - name: my-aws-cli
      image: amazon/aws-cli:latest
      command: ['sleep', '36000']
  restartPolicy: Never
EOF

# 확인
kubectl get pod
kubectl describe pod

# aws 서비스 사용 시도
kubectl exec -it eks-iam-test2 -- aws s3 ls

# 서비스 어카운트 토큰 확인
SA_TOKEN=$(kubectl exec -it eks-iam-test2 -- cat /var/run/secrets/kubernetes.io/serviceaccount/token)
echo $SA_TOKEN

# jwt 혹은 아래 JWT 웹 사이트 이용
jwt decode $SA_TOKEN --json --iso8601
...

#헤더
{
  "alg": "RS256",
  "kid": "1a8fcaee12b3a8f191327b5e9b997487ae93baab"
}

# 페이로드 : OAuth2에서 쓰이는 aud, exp 속성 확인! > projectedServiceAccountToken 기능으로 토큰에 audience,exp 항목을 덧붙힘
## iss 속성 : EKS OpenID Connect Provider(EKS IdP) 주소 > 이 EKS IdP를 통해 쿠버네티스가 발급한 토큰이 유요한지 검증
{
  "aud": [
    "https://kubernetes.default.svc"  # 해당 주소는 k8s api의 ClusterIP 서비스 주소 도메인명, kubectl get svc kubernetes
  ],
  "exp": 1716619848,
  "iat": 1685083848,
  "iss": "https://oidc.eks.ap-northeast-2.amazonaws.com/id/F6A7523462E8E6CDADEE5D41DF2E71F6",
  "kubernetes.io": {
    "namespace": "default",
    "pod": {
      "name": "eks-iam-test2",
      "uid": "10dcccc8-a16c-4fc7-9663-13c9448e107a"
    },
    "serviceaccount": {
      "name": "default",
      "uid": "acb6c60d-0c5f-4583-b83b-1b629b0bdd87"
    },
    "warnafter": 1685087455
  },
  "nbf": 1685083848,
  "sub": "system:serviceaccount:default:default"
}

# 파드2 삭제
kubectl delete pod eks-iam-test2

 

 

위에서 생성한 파드도 마찬가지로 권한이 없는 상태이다.

 

위의 토큰을 디코딩해보면 서비스 어카운트, 파드네임등의 정보가 담겨있다. iss(oidc 제공자 정보)도 확인된다.

 

마지막 실습이다.

# Create an iamserviceaccount - AWS IAM role bound to a Kubernetes service account
eksctl create **iamserviceaccount** \\
  --name **my-sa** \\
  --namespace **default** \\
  --cluster $CLUSTER_NAME \\
  --approve \\
  --attach-policy-arn $(aws iam list-policies --query 'Policies[?PolicyName==`AmazonS3ReadOnlyAccess`].Arn' --output text)

# 확인 >> 웹 관리 콘솔에서 CloudFormation Stack >> IAM Role 확인
# aws-load-balancer-controller IRSA는 어떤 동작을 수행할 것 인지 생각해보자!
eksctl get iamserviceaccount --cluster $CLUSTER_NAME

# Inspecting the newly created Kubernetes Service Account, we can see the role we want it to assume in our pod.
**kubectl get sa**
**kubectl describe sa my-sa**
Name:                my-sa
Namespace:           default
Labels:              app.kubernetes.io/managed-by=eksctl
Annotations:         **eks.amazonaws.com/role-arn: arn:aws:iam::911283464785:role/eksctl-myeks-addon-iamserviceaccount-default-Role1-1MJUYW59O6QGH**
Image pull secrets:  <none>
Mountable secrets:   <none>
Tokens:              <none>
Events:              <none>

irsa를 통해 아마존S3 리드온리 정책을 만들어준다.

그리고 해당 어카운트를 사용하는 파드를 생성한다.

# 파드3번 생성
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
  name: eks-iam-test3
spec:
  **serviceAccountName: my-sa**
  containers:
    - name: my-aws-cli
      image: amazon/aws-cli:latest
      command: ['sleep', '36000']
  restartPolicy: Never
EOF

# 해당 SA를 파드가 사용 시 mutatingwebhook으로 Env,Volume 추가함
kubectl get mutatingwebhookconfigurations pod-identity-webhook -o yaml | kubectl neat | yh

**# 파드 생성 yaml에 없던 내용이 추가됨!!!!!**
# **Pod Identity Webhook**은 **mutating** webhook을 통해 아래 **Env 내용**과 **1개의 볼륨**을 추가함
kubectl get pod eks-iam-test3
**kubectl describe pod eks-iam-test3**
...
**Environment**:
      AWS_STS_REGIONAL_ENDPOINTS:   regional
      AWS_DEFAULT_REGION:           ap-northeast-2
      AWS_REGION:                   ap-northeast-2
      AWS_ROLE_ARN:                 arn:aws:iam::911283464785:role/eksctl-myeks-addon-iamserviceaccount-default-Role1-GE2DZKJYWCEN
      AWS_WEB_IDENTITY_TOKEN_FILE:  /var/run/secrets/eks.amazonaws.com/serviceaccount/token
    Mounts:
      /var/run/secrets/eks.amazonaws.com/serviceaccount from aws-iam-token (ro)
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-69rh8 (ro)
...
**Volumes:**
  **aws-iam-token**:
    Type:                    **Projected** (a volume that contains injected data from multiple sources)
    TokenExpirationSeconds:  86400
  kube-api-access-sn467:
    Type:                    Projected (a volume that contains injected data from multiple sources)
    TokenExpirationSeconds:  3607
    ConfigMapName:           kube-root-ca.crt
    ConfigMapOptional:       <nil>
    DownwardAPI:             true
...

# 파드에서 aws cli 사용 확인
eksctl get iamserviceaccount --cluster $CLUSTER_NAME
**kubectl exec -it eks-iam-test3 -- aws sts get-caller-identity --query Arn**
"arn:aws:sts::911283464785:assumed-role/eksctl-myeks-addon-iamserviceaccount-default-Role1-GE2DZKJYWCEN/botocore-session-1685179271"

# 되는 것고 안되는 것은 왜그런가?
kubectl exec -it eks-iam-test3 -- **aws s3 ls**
kubectl exec -it eks-iam-test3 -- **aws ec2 describe-instances --region ap-northeast-2**
kubectl exec -it eks-iam-test3 -- **aws ec2 describe-vpcs --region ap-northeast-2**

위에서 보면 Env가 추가된것이 확인이 된다.

 

최종적으로 아까 계속 S3 조회 실패하던것이 이번에는 성공한것을 확인할수있다.

 

우리가 준 권한은 S3리드온리이기 때문에 ec2 리드등 다른 정보는 볼수없다.

 

 

 

'job > eks' 카테고리의 다른 글

7주차 eks 스터디  (0) 2023.06.08
eks 5주차  (0) 2023.05.23
4주차 eks 스터디  (0) 2023.05.16
3주차 eks 스터디  (0) 2023.05.13
eks 2주차  (0) 2023.05.02

google codelabs 를 이용하여 codelabs 페이지를 생성해보았다. (메뉴얼 작성을 위해)

 

https://github.com/googlecodelabs/tools/tree/main/site 

https://medium.com/@zarinlo/publish-technical-tutorials-in-google-codelab-format-b07ef76972cd

을참고

 

 

nodejs는 14버전으로 설치 해야하고

python은 2버전으로 설치해야한다.

brew install nvm
nvm install 14.17.4
nvm use v14.17.4
nvm list
##위와같이 nodejs 버전 관리는 nvm으로 하고 14버전 설치, 사용해준다. 


pyenv install 2.7.18
pyenv global 2.7.18
pyenv versions
##마찬가지로 파이썬의 버전도 pyenv로 관리하고 2.7설치, 사용해준다.

brew update && brew install go
go install github.com/googlecodelabs/tools/claat@latest
##go와 claat 설치

## not required if you’re only using Go modules 
export GOPATH=$HOME/go
export GOROOT=/usr/local/go

## 랜딩페이지 install
git clone https://github.com/googlecodelabs/tools

## navigate to folder
cd site/
## install dependencies
npm install
npm install -g gulp-cli

gulp serve
##localhost:8000으로 접속하면 페이지 확인

 

diff -y eks-oneclick3.yaml eks-oneclick4.yaml

4주차 원클릭과 5주차 원클릭의 차이는

ebs-csi-driver 설치 안함

t3 설치

xRay true로 변경

 

언제나처럼 실습환경 설치

  • Amazon EKS (myeks) 윈클릭 배포 & 기본 설정
    • 기본 설정
    # default 네임스페이스 적용
    kubectl ns default
    
    # (옵션) context 이름 변경
    NICK=<각자 자신의 닉네임>
    NICK=gasida
    kubectl ctx
    kubectl config rename-context admin@myeks.ap-northeast-2.eksctl.io $NICK@myeks
    
    # ExternalDNS
    MyDomain=<자신의 도메인>
    echo "export MyDomain=<자신의 도메인>" >> /etc/profile
    *MyDomain=gasida.link*
    *echo "export MyDomain=gasida.link" >> /etc/profile*
    MyDnzHostedZoneId=$(aws route53 list-hosted-zones-by-name --dns-name "${MyDomain}." --query "HostedZones[0].Id" --output text)
    echo $MyDomain, $MyDnzHostedZoneId
    curl -s -O <https://raw.githubusercontent.com/gasida/PKOS/main/aews/externaldns.yaml>
    MyDomain=$MyDomain MyDnzHostedZoneId=$MyDnzHostedZoneId envsubst < externaldns.yaml | kubectl apply -f -
    
    # kube-ops-view
    helm repo add geek-cookbook <https://geek-cookbook.github.io/charts/>
    helm install kube-ops-view geek-cookbook/kube-ops-view --version 1.2.2 --set env.TZ="Asia/Seoul" --namespace kube-system
    kubectl patch svc -n kube-system kube-ops-view -p '{"spec":{"type":"LoadBalancer"}}'
    kubectl **annotate** service kube-ops-view -n kube-system "external-dns.alpha.kubernetes.io/hostname=**kubeopsview**.$MyDomain"
    echo -e "Kube Ops View URL = http://**kubeopsview**.$MyDomain:8080/#scale=1.5"
    
    # AWS LB Controller
    helm repo add eks <https://aws.github.io/eks-charts>
    helm repo update
    helm install aws-load-balancer-controller eks/aws-load-balancer-controller -n kube-system --set clusterName=$CLUSTER_NAME \\
      --set serviceAccount.create=false --set serviceAccount.name=aws-load-balancer-controller
    
    # 노드 보안그룹 ID 확인
    NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values='*ng1*' --query "SecurityGroups[*].[GroupId]" --output text)
    aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr 192.168.1.100/32
    
    • 프로메테우스 & 그라파나(admin / prom-operator) 설치 : 대시보드 추천 15757 17900 15172
    # 사용 리전의 인증서 ARN 확인
    CERT_ARN=`aws acm list-certificates --query 'CertificateSummaryList[].CertificateArn[]' --output text`
    echo $CERT_ARN
    
    # repo 추가
    helm repo add prometheus-community <https://prometheus-community.github.io/helm-charts>
    
    # 파라미터 파일 생성
    cat < monitor-values.yaml
    **prometheus**:
      prometheusSpec:
        podMonitorSelectorNilUsesHelmValues: false
        serviceMonitorSelectorNilUsesHelmValues: false
        retention: 5d
        retentionSize: "10GiB"
    
      verticalPodAutoscaler:
        enabled: true
    
      ingress:
        enabled: true
        ingressClassName: alb
        hosts: 
          - prometheus.$MyDomain
        paths: 
          - /*
        annotations:
          alb.ingress.kubernetes.io/scheme: internet-facing
          alb.ingress.kubernetes.io/target-type: ip
          alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
          alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
          alb.ingress.kubernetes.io/success-codes: 200-399
          alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
          alb.ingress.kubernetes.io/group.name: study
          alb.ingress.kubernetes.io/ssl-redirect: '443'
    
    grafana:
      defaultDashboardsTimezone: Asia/Seoul
      adminPassword: prom-operator
    
      ingress:
        enabled: true
        ingressClassName: alb
        hosts: 
          - grafana.$MyDomain
        paths: 
          - /*
        annotations:
          alb.ingress.kubernetes.io/scheme: internet-facing
          alb.ingress.kubernetes.io/target-type: ip
          alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
          alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
          alb.ingress.kubernetes.io/success-codes: 200-399
          alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
          alb.ingress.kubernetes.io/group.name: study
          alb.ingress.kubernetes.io/ssl-redirect: '443'
    
    defaultRules:
      create: false
    kubeControllerManager:
      enabled: false
    kubeEtcd:
      enabled: false
    kubeScheduler:
      enabled: false
    alertmanager:
      enabled: false
    EOT
    
    # 배포
    kubectl create ns monitoring
    helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version 45.27.2 \\
    --set prometheus.prometheusSpec.scrapeInterval='15s' --set prometheus.prometheusSpec.evaluationInterval='15s' \\
    -f monitor-values.yaml --namespace monitoring
    
    # Metrics-server 배포
    **kubectl apply -f <https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml**>
    
  • EKS Node Viewer 설치 : 노드 할당 가능 용량요청 request 리소스 표시, 실제 파드 리소스 사용량 X - 링크
    • It displays the scheduled pod resource requests vs the allocatable capacity on the node. It does not look at the actual pod resource usage.
    # go 설치
    yum install -y go
    
    # EKS Node Viewer 설치 : 현재 ec2 spec에서는 **설치에 다소 시간이 소요됨** = **2분 이상**
    go install github.com/awslabs/eks-node-viewer/cmd/eks-node-viewer@latest
    
    # bin 확인 및 사용 
    tree ~/go/bin
    **cd ~/go/bin**
    **./eks-node-viewer**
    3 nodes (875m/5790m) 15.1% cpu ██████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ $0.156/hour | $113.880/month
    20 pods (0 pending 20 running 20 bound)
    
    ip-192-168-3-196.ap-northeast-2.compute.internal cpu ████████░░░░░░░░░░░░░░░░░░░░░░░░░░░  22% (7 pods) t3.medium/$0.0520 On-Demand
    ip-192-168-1-91.ap-northeast-2.compute.internal  cpu ████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░  12% (6 pods) t3.medium/$0.0520 On-Demand
    ip-192-168-2-185.ap-northeast-2.compute.internal cpu ████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░  12% (7 pods) t3.medium/$0.0520 On-Demand
    Press any key to quit
    
    **명령 샘플**
    # Standard usage
    **./eks-node-viewer**
    
    # Display both CPU and Memory Usage
    **./eks-node-viewer --resources cpu,memory**
    
    # Karenter nodes only
    **./eks-node-viewer --node-selector "karpenter.sh/provisioner-name"**
    
    # Display extra labels, i.e. AZ
    **./eks-node-viewer --extra-labels topology.kubernetes.io/zone**
    
    # Specify a particular AWS profile and region
    AWS_PROFILE=myprofile AWS_REGION=us-west-2
    
    **기본 옵션**
    # select only Karpenter managed nodes
    node-selector=karpenter.sh/provisioner-name
    
    # display both CPU and memory
    resources=cpu,memory
    

 

HPA, VPA, CA에 대한 설명
HPA - 스케일인/아웃, KEDA
VPA - 스케일업/다운(사용 잘 안함)
CA - 노드 확장/축소

 

HPA 실습 시작

# Run and expose php-apache server
curl -s -O <https://raw.githubusercontent.com/kubernetes/website/main/content/en/examples/application/php-apache.yaml>
cat php-apache.yaml | yh
**kubectl apply -f php-apache.yaml**

# 확인
kubectl exec -it deploy/php-apache -- **cat /var/www/html/index.php**
...

# 모니터링 : 터미널2개 사용
watch -d 'kubectl get hpa,pod;echo;kubectl top pod;echo;kubectl top node'
kubectl exec -it deploy/php-apache -- top

# 접속
PODIP=$(kubectl get pod -l run=php-apache -o jsonpath={.items[0].status.podIP})
curl -s $PODIP; echo

현재 노드 사용량은 위와같다.

 

# Create the HorizontalPodAutoscaler : requests.cpu=200m - [알고리즘](<https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details>)
# Since each pod requests **200 milli-cores** by kubectl run, this means an average CPU usage of **100 milli-cores**.
kubectl autoscale deployment php-apache **--cpu-percent=50** --min=1 --max=10
**kubectl describe hpa**
...
Metrics:                                               ( current / target )
  resource cpu on pods  (as a percentage of request):  0% (1m) / 50%
Min replicas:                                          1
Max replicas:                                          10
Deployment pods:                                       1 current / 1 desired
...

# HPA 설정 확인
kubectl krew install neat
kubectl get hpa php-apache -o yaml
**kubectl get hpa php-apache -o yaml | kubectl neat | yh**
spec: 
  minReplicas: 1               # [4] 또는 최소 1개까지 줄어들 수도 있습니다
  maxReplicas: 10              # [3] 포드를 최대 5개까지 늘립니다
  **scaleTargetRef**: 
    apiVersion: apps/v1
    kind: **Deployment**
    name: **php-apache**           # [1] php-apache 의 자원 사용량에서
  **metrics**: 
  - type: **Resource**
    resource: 
      name: **cpu**
      target: 
        type: **Utilization**
        **averageUtilization**: 50  # [2] CPU 활용률이 50% 이상인 경우

# 반복 접속 1 (파드1 IP로 접속) >> 증가 확인 후 중지
while true;do curl -s $PODIP; sleep 0.5; done

# 반복 접속 2 (서비스명 도메인으로 접속) >> 증가 확인(몇개까지 증가되는가? 그 이유는?) 후 중지 >> **중지 5분 후** 파드 갯수 감소 확인
# Run this in a separate terminal
# so that the load generation continues and you can carry on with the rest of the steps
kubectl run -i --tty load-generator --rm --image=**busybox**:1.28 --restart=Never -- /bin/sh -c "while sleep 0.01; do wget -q -O- ; done"

사용량 올라가는중

위와같이 파드가 오토스케일링된것을 확인할수있다.

 

이번엔 KEDA

HPA는 CPU, Memory 메트릭을 기반으로 스케일 인/아웃을 하는데 KEDA는 특정 이벤트를 기반으로 스케일여부를 결정한다.

지원 메트릭이 무지하게 많다.

 

  • KEDA with Helm : 특정 **이벤트(cron 등)**기반의 파드 오토 스케일링 - Chart Grafana Cron
    # KEDA 설치
    cat < keda-values.yaml
    metricsServer:
      useHostNetwork: true
    
    **prometheus**:
      metricServer:
        enabled: true
        port: 9022
        portName: metrics
        path: /metrics
        serviceMonitor:
          # Enables ServiceMonitor creation for the Prometheus Operator
          enabled: true
        podMonitor:
          # Enables PodMonitor creation for the Prometheus Operator
          enabled: true
      operator:
        enabled: true
        port: 8080
        serviceMonitor:
          # Enables ServiceMonitor creation for the Prometheus Operator
          enabled: true
        podMonitor:
          # Enables PodMonitor creation for the Prometheus Operator
          enabled: true
    
      webhooks:
        enabled: true
        port: 8080
        serviceMonitor:
          # Enables ServiceMonitor creation for the Prometheus webhooks
          enabled: true
    EOT
    
    kubectl create namespace **keda**
    helm repo add kedacore <https://kedacore.github.io/charts>
    helm install **keda** kedacore/keda --version 2.10.2 --namespace **keda -f** keda-values.yaml
    
    # KEDA 설치 확인
    kubectl **get-all** -n keda
    kubectl get **all** -n keda
    **kubectl get crd | grep keda**
    
    # keda 네임스페이스에 디플로이먼트 생성
    **kubectl apply -f php-apache.yaml -n keda
    kubectl get pod -n keda**
    
    # ScaledObject ****정책 생성 : cron
    cat < keda-cron.yaml
    apiVersion: keda.sh/v1alpha1
    kind: **ScaledObject**
    metadata:
      name: php-apache-cron-scaled
    spec:
      minReplicaCount: 0
      maxReplicaCount: 2
      pollingInterval: 30
      cooldownPeriod: 300
      **scaleTargetRef**:
        apiVersion: apps/v1
        kind: Deployment
        name: php-apache
      **triggers**:
      - type: **cron**
        metadata:
          timezone: Asia/Seoul
          **start**: 00,15,30,45 * * * *
          **end**: 05,20,35,50 * * * *
          **desiredReplicas**: "1"
    EOT
    **kubectl apply -f keda-cron.yaml -n keda**
    
    # 그라파나 대시보드 추가
    # 모니터링
    watch -d 'kubectl get ScaledObject,hpa,pod -n keda'
    kubectl get ScaledObject -w
    
    # 확인
    kubectl get ScaledObject,hpa,pod -n keda
    **kubectl get hpa -o jsonpath={.items[0].spec} -n keda | jq**
    ...
    "metrics": [
        {
          "**external**": {
            "metric": {
              "name": "s0-cron-Asia-Seoul-**00,15,30,45**xxxx-**05,20,35,50**xxxx",
              "selector": {
                "matchLabels": {
                  "scaledobject.keda.sh/name": "php-apache-cron-scaled"
                }
              }
            },
            "**target**": {
              "**averageValue**": "1",
              "type": "AverageValue"
            }
          },
          "type": "**External**"
        }
    
    # KEDA 및 deployment 등 삭제
    kubectl delete -f keda-cron.yaml -n keda && kubectl delete deploy php-apache -n keda && helm uninstall keda -n keda
    kubectl delete namespace keda
    

설치 완료, 30분에 생성되고 35분에 삭제될것이다.

29분의 상태

 

3

30분이되니 php-apache 파드가 생성된것을 확인할수있다.

 

CA 실습 시작

 

HPA는 파드확장, CA는 노드 확장개념이다. 

 

# 현재 autoscaling(ASG) 정보 확인
# aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='**클러스터이름**']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table
**aws autoscaling describe-auto-scaling-groups \\
    --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" \\
    --output table**
-----------------------------------------------------------------
|                   DescribeAutoScalingGroups                   |
+------------------------------------------------+----+----+----+
|  eks-ng1-44c41109-daa3-134c-df0e-0f28c823cb47  |  3 |  3 |  3 |
+------------------------------------------------+----+----+----+

# MaxSize 6개로 수정
**export ASG_NAME=$(aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].AutoScalingGroupName" --output text)
aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 3 --desired-capacity 3 --max-size 6**

# 확인
**aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table**
-----------------------------------------------------------------
|                   DescribeAutoScalingGroups                   |
+------------------------------------------------+----+----+----+
|  eks-ng1-c2c41e26-6213-a429-9a58-02374389d5c3  |  3 |  6 |  3 |
+------------------------------------------------+----+----+----+

# 배포 : Deploy the Cluster Autoscaler (CA)
curl -s -O <https://raw.githubusercontent.com/kubernetes/autoscaler/master/cluster-autoscaler/cloudprovider/aws/examples/cluster-autoscaler-autodiscover.yaml>
sed -i "s//$CLUSTER_NAME/g" cluster-autoscaler-autodiscover.yaml
kubectl apply -f cluster-autoscaler-autodiscover.yaml

# 확인
kubectl get pod -n kube-system | grep cluster-autoscaler
kubectl describe deployments.apps -n kube-system cluster-autoscaler

# (옵션) cluster-autoscaler 파드가 동작하는 워커 노드가 퇴출(evict) 되지 않게 설정
kubectl -n kube-system annotate deployment.apps/cluster-autoscaler cluster-autoscaler.kubernetes.io/safe-to-evict="false"

 

현재 ec2 3대임을 확인할수있다.

# 모니터링 
kubectl get nodes -w
while true; do kubectl get node; echo "------------------------------" ; date ; sleep 1; done
while true; do aws ec2 describe-instances --query "Reservations[*].Instances[*].{PrivateIPAdd:PrivateIpAddress,InstanceName:Tags[?Key=='Name']|[0].Value,Status:State.Name}" --filters Name=instance-state-name,Values=running --output text ; echo "------------------------------"; date; sleep 1; done

# Deploy a Sample App
# We will deploy an sample nginx application as a ReplicaSet of 1 Pod
cat <<EoF> nginx.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-to-scaleout
spec:
  replicas: 1
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        service: nginx
        app: nginx
    spec:
      containers:
      - image: nginx
        name: nginx-to-scaleout
        resources:
          limits:
            cpu: 500m
            memory: 512Mi
          requests:
            cpu: 500m
            memory: 512Mi
EoF

kubectl apply -f nginx.yaml
kubectl get deployment/nginx-to-scaleout

# Scale our ReplicaSet
# Let’s scale out the replicaset to 15
kubectl scale --replicas=15 deployment/nginx-to-scaleout && date

# 확인
kubectl get pods -l app=nginx -o wide --watch
kubectl -n kube-system logs -f deployment/cluster-autoscaler

# 노드 자동 증가 확인
kubectl get nodes
aws autoscaling describe-auto-scaling-groups \\
    --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='**myeks**']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" \\
    --output table

**./eks-node-viewer**
42 pods (0 pending 42 running 42 bound)
ip-192-168-3-196.ap-northeast-2.compute.internal cpu ███████████████████████████████████ 100% (10 pods) t3.medium/$0.0520 On-Demand
ip-192-168-1-91.ap-northeast-2.compute.internal  cpu ███████████████████████████████░░░░  89% (9 pods)  t3.medium/$0.0520 On-Demand
ip-192-168-2-185.ap-northeast-2.compute.internal cpu █████████████████████████████████░░  95% (11 pods) t3.medium/$0.0520 On-Demand
ip-192-168-2-87.ap-northeast-2.compute.internal  cpu █████████████████████████████░░░░░░  84% (6 pods)  t3.medium/$0.0520 On-Demand
ip-192-168-3-15.ap-northeast-2.compute.internal  cpu █████████████████████████████░░░░░░  84% (6 pods)  t3.medium/$0.0520 On-Demand

# 디플로이먼트 삭제
kubectl delete -f nginx.yaml && date

# **노드 갯수 축소** : 기본은 10분 후 scale down 됨, 물론 아래 flag 로 시간 수정 가능 >> 그러니 **디플로이먼트 삭제 후 10분 기다리고 나서 보자!**
# By default, cluster autoscaler will wait 10 minutes between scale down operations, 
# you can adjust this using the --scale-down-delay-after-add, --scale-down-delay-after-delete, 
# and --scale-down-delay-after-failure flag. 
# E.g. --scale-down-delay-after-add=5m to decrease the scale down delay to 5 minutes after a node has been added.

# 터미널1
watch -d kubectl get node

위와같이 배포를 진행하면

인스턴스가 생성된다. 즉 노드가 확장되는것이다.

 

이번엔 CPA

노드 수 증가에 비례하여 성능 처리가 필요한 애플리케이션(컨테이너/파드)를 수평으로 자동 확장하는 개념

 

#
helm repo add cluster-proportional-autoscaler <https://kubernetes-sigs.github.io/cluster-proportional-autoscaler>

# CPA규칙을 설정하고 helm차트를 릴리즈 필요
helm upgrade --install cluster-proportional-autoscaler cluster-proportional-autoscaler/cluster-proportional-autoscaler

# nginx 디플로이먼트 배포
cat < cpa-nginx.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-deployment
spec:
  replicas: 1
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
      - name: nginx
        image: nginx:latest
        resources:
          limits:
            cpu: "100m"
            memory: "64Mi"
          requests:
            cpu: "100m"
            memory: "64Mi"
        ports:
        - containerPort: 80
EOT
kubectl apply -f cpa-nginx.yaml

# CPA 규칙 설정
cat < cpa-values.yaml
config:
  ladder:
    nodesToReplicas:
      - [1, 1]
      - [2, 2]
      - [3, 3]
      - [4, 3]
      - [5, 5]
options:
  namespace: default
  target: "deployment/nginx-deployment"
EOF

# 모니터링
**watch -d kubectl get pod**

# helm 업그레이드
helm upgrade --install cluster-proportional-autoscaler -f cpa-values.yaml cluster-proportional-autoscaler/cluster-proportional-autoscaler

# 노드 5개로 증가
export ASG_NAME=$(aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].AutoScalingGroupName" --output text)
aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 5 --desired-capacity 5 --max-size 5
aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table

# 노드 4개로 축소
aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 4 --desired-capacity 4 --max-size 4
aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table

 

마지막으로 karpenter 이다.

 

karpenter는 CA보다 더 똑똑하게 작동한다.

 

  • Kubernetes 스케줄러가 예약할 수 없다고 표시한 파드들을 감시합니다.
  • 파드가 요청한 자원 요구사항, 노드 선택자, 어피니티, 허용 조건, 그리고 토폴로지 분산 제약조건을 평가합니다.
  • 파드 요구사항을 충족하는 노드프로비저닝합니다.
  • 파드를 새로운 노드에 예약합니다.
  • 더 이상 필요하지 않은 경우 노드제거합니다.

karpenter 실습은 새로 배포하여 실습한다.

# YAML 파일 다운로드
curl -O <https://s3.ap-northeast-2.amazonaws.com/cloudformation.cloudneta.net/K8S/**karpenter-preconfig.yaml**>

# CloudFormation 스택 배포
예시) aws cloudformation deploy --template-file **karpenter-preconfig.yaml** --stack-name **myeks2** --parameter-overrides KeyName=**kp-gasida** SgIngressSshCidr=$(curl -s ipinfo.io/ip)/32  MyIamUserAccessKeyID=**AKIA5...** MyIamUserSecretAccessKey=**'CVNa2...'** ClusterBaseName=**myeks2** --region ap-northeast-2

# CloudFormation 스택 배포 완료 후 작업용 EC2 IP 출력
aws cloudformation describe-stacks --stack-name **myeks2** --query 'Stacks[*].**Outputs[0]**.OutputValue' --output text

# 작업용 EC2 SSH 접속
ssh -i **~/.ssh/kp-gasida.pem** ec2-user@$(aws cloudformation describe-stacks --stack-name **myeks2** --query 'Stacks[*].Outputs[0].OutputValue' --output text)
# IP 주소 확인 : 172.30.0.0/16 VPC 대역에서 172.30.1.0/24 대역을 사용 중
ip -br -c addr

# EKS Node Viewer 설치 : 현재 ec2 spec에서는 **설치에 다소 시간이 소요됨** = **2분 이상**
go install github.com/awslabs/eks-node-viewer/cmd/eks-node-viewer@latest

# [터미널1] bin 확인 및 사용
tree ~/go/bin
cd ~/go/bin
**./eks-node-viewer -h**
./eks-node-viewer  # EKS 배포 완료 후 실행 하자
```bash
# 환경변수 정보 확인
export | egrep 'ACCOUNT|AWS_|CLUSTER' | egrep -v 'SECRET|KEY'

# 환경변수 설정
export KARPENTER_VERSION=v0.27.5
export TEMPOUT=$(mktemp)
**echo $KARPENTER_VERSION $CLUSTER_NAME $AWS_DEFAULT_REGION $AWS_ACCOUNT_ID $TEMPOUT**

# CloudFormation 스택으로 IAM Policy, Role, EC2 Instance Profile 생성 : **3분 정도 소요**
curl -fsSL https://karpenter.sh/"${KARPENTER_VERSION}"/getting-started/getting-started-with-karpenter/cloudformation.yaml  > $TEMPOUT \
&& aws cloudformation deploy \
  --stack-name "Karpenter-${CLUSTER_NAME}" \
  --template-file "${TEMPOUT}" \
  --capabilities CAPABILITY_NAMED_IAM \
  --parameter-overrides "ClusterName=${CLUSTER_NAME}"

# 클러스터 생성 : myeks2 EKS 클러스터 생성 **19분 정도** 소요
**eksctl create cluster** -f - <<EOF
---
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
  name: ${CLUSTER_NAME}
  region: ${AWS_DEFAULT_REGION}
  version: "1.24"
  tags:
    karpenter.sh/discovery: ${CLUSTER_NAME}

iam:
  withOIDC: true
  serviceAccounts:
  - metadata:
      name: karpenter
      namespace: karpenter
    roleName: ${CLUSTER_NAME}-karpenter
    attachPolicyARNs:
    - arn:aws:iam::${AWS_ACCOUNT_ID}:policy/KarpenterControllerPolicy-${CLUSTER_NAME}
    roleOnly: true

iamIdentityMappings:
- arn: "arn:aws:iam::${AWS_ACCOUNT_ID}:role/KarpenterNodeRole-${CLUSTER_NAME}"
  username: system:node:{{EC2PrivateDNSName}}
  groups:
  - system:bootstrappers
  - system:nodes

managedNodeGroups:
- instanceType: m5.large
  amiFamily: AmazonLinux2
  name: ${CLUSTER_NAME}-ng
  desiredCapacity: 2
  minSize: 1
  maxSize: 10
  iam:
    withAddonPolicies:
      externalDNS: true

*## Optionally run on fargate
# fargateProfiles:
# - name: karpenter
#  selectors:
#  - namespace: karpenter*
EOF

**# eks 배포 확인**
eksctl get cluster
eksctl get nodegroup --cluster $CLUSTER_NAME
eksctl get **iamidentitymapping** --cluster $CLUSTER_NAME
eksctl get **iamserviceaccount** --cluster $CLUSTER_NAME
eksctl get addon --cluster $CLUSTER_NAME

# [터미널1] eks-node-viewer
cd ~/go/bin && ./eks-node-viewer

**# k8s 확인**
kubectl cluster-info
kubectl get node --label-columns=node.kubernetes.io/instance-type,eks.amazonaws.com/capacityType,topology.kubernetes.io/zone
kubectl get pod -n kube-system -owide
**kubectl describe cm -n kube-system aws-auth**
...
mapRoles:
----
- groups:
  - system:bootstrappers
  - system:**nodes**
  **rolearn**: arn:aws:iam::911283464785:role/**KarpenterNodeRole-myeks2**
  username: system:node:{{EC2PrivateDNSName}}
- groups:
  - system:bootstrappers
  - system:nodes
  rolearn: arn:aws:iam::911283464785:role/eksctl-myeks2-nodegroup-myeks2-ng-NodeInstanceRole-1KDXF4FLKKX1B
  username: system:node:{{EC2PrivateDNSName}}
...

# 카펜터 설치를 위한 환경 변수 설정 및 확인
export CLUSTER_ENDPOINT="$(aws eks describe-cluster --name ${CLUSTER_NAME} --query "cluster.endpoint" --output text)"
export KARPENTER_IAM_ROLE_ARN="arn:aws:iam::${AWS_ACCOUNT_ID}:role/${CLUSTER_NAME}-karpenter"
echo $CLUSTER_ENDPOINT $KARPENTER_IAM_ROLE_ARN

# EC2 Spot Fleet 사용을 위한 service-linked-role 생성 확인 : 만들어있는것을 확인하는 거라 아래 에러 출력이 정상!
# If the role has already been successfully created, you will see:
# An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForEC2Spot has been taken in this account, please try a different suffix.
aws iam create-service-linked-role --aws-service-name spot.amazonaws.com || true

# docker logout : Logout of docker to perform an unauthenticated pull against the public ECR
docker logout public.ecr.aws

# karpenter 설치
helm upgrade --install karpenter oci://public.ecr.aws/karpenter/karpenter --version ${KARPENTER_VERSION} --namespace karpenter --create-namespace \
  --set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"=${KARPENTER_IAM_ROLE_ARN} \
  --set settings.aws.clusterName=${CLUSTER_NAME} \
  --set settings.aws.defaultInstanceProfile=KarpenterNodeInstanceProfile-${CLUSTER_NAME} \
  --set settings.aws.interruptionQueueName=${CLUSTER_NAME} \
  --set controller.resources.requests.cpu=1 \
  --set controller.resources.requests.memory=1Gi \
  --set controller.resources.limits.cpu=1 \
  --set controller.resources.limits.memory=1Gi \
  --wait

# 확인
kubectl get-all -n karpenter
kubectl get all -n karpenter
kubectl get cm -n karpenter karpenter-global-settings -o jsonpath={.data} | jq
kubectl get crd | grep karpenter
```

위와같이 설치를 진행해준다.

 

그다음 진행

#
cat <<EOF | kubectl apply -f -
apiVersion: karpenter.sh/v1alpha5
kind: **Provisioner**
metadata:
  name: default
**spec**:
  requirements:
    - key: karpenter.sh/capacity-type
      operator: In
      values: ["**spot**"]
  limits:
    resources:
      cpu: 1000
  providerRef:
    name: default
  **ttlSecondsAfterEmpty**: 30
---
apiVersion: karpenter.k8s.aws/v1alpha1
kind: **AWSNodeTemplate**
metadata:
  name: default
spec:
  subnetSelector:
    karpenter.sh/discovery: ${CLUSTER_NAME}
  securityGroupSelector:
    karpenter.sh/discovery: ${CLUSTER_NAME}
EOF

# 확인
kubectl get awsnodetemplates,provisioners

위와같이   Provisioner  설치를 진행한다.

# pause 파드 1개에 CPU 1개 최소 보장 할당
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: inflate
spec:
  replicas: 0
  selector:
    matchLabels:
      app: inflate
  template:
    metadata:
      labels:
        app: inflate
    spec:
      terminationGracePeriodSeconds: 0
      containers:
        - name: inflate
          image: public.ecr.aws/eks-distro/kubernetes/pause:3.7
          resources:
            **requests:
              cpu: 1**
EOF
kubectl scale deployment inflate --replicas 5
kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter -c controller

# 스팟 인스턴스 확인!
aws ec2 describe-spot-instance-requests --filters "Name=state,Values=active" --output table
kubectl get node -l karpenter.sh/capacity-type=spot -o jsonpath='{.items[0].metadata.labels}' | jq
**kubectl get node --label-columns=eks.amazonaws.com/capacityType,karpenter.sh/capacity-type,node.kubernetes.io/instance-type**
NAME                                                 STATUS   ROLES    AGE     VERSION                CAPACITYTYPE   CAPACITY-TYPE   INSTANCE-TYPE
ip-192-168-165-220.ap-northeast-2.compute.internal   Ready    <none>   2m37s   v1.24.13-eks-0a21954                  spot            c5a.2xlarge
ip-192-168-57-91.ap-northeast-2.compute.internal     Ready    <none>   13m     v1.24.13-eks-0a21954   ON_DEMAND                      m5.large
ip-192-168-75-253.ap-northeast-2.compute.internal    Ready    <none>   13m     v1.24.13-eks-0a21954   ON_DEMAND                      m5.large

 

실습을 진행하는 과정에서

 

위와같은 에러가 발생했다.

https://github.com/aws/karpenter/issues/2902

와 동일한 에러인지... 추후 확인이 필요하다.

 

 

#
kubectl delete provisioners default
cat <<EOF | kubectl apply -f -
apiVersion: karpenter.sh/v1alpha5
kind: **Provisioner**
metadata:
  name: default
spec:
  **consolidation:
    enabled: true**
  labels:
    type: karpenter
  limits:
    resources:
      cpu: 1000
      memory: 1000Gi
  providerRef:
    name: default
  requirements:
    - key: karpenter.sh/capacity-type
      operator: In
      values:
        - on-demand
    - key: node.kubernetes.io/instance-type
      operator: In
      values:
        - c5.large
        - m5.large
        - m5.xlarge
EOF

#
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: inflate
spec:
  replicas: 0
  selector:
    matchLabels:
      app: inflate
  template:
    metadata:
      labels:
        app: inflate
    spec:
      terminationGracePeriodSeconds: 0
      containers:
        - name: inflate
          image: public.ecr.aws/eks-distro/kubernetes/pause:3.7
          resources:
            requests:
              cpu: 1
EOF
kubectl scale deployment inflate --replicas 12
kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter -c controller

# 인스턴스 확인
# This changes the total memory request for this deployment to around 12Gi, 
# which when adjusted to account for the roughly 600Mi reserved for the kubelet on each node means that this will fit on 2 instances of type m5.large:
kubectl get node -l type=karpenter
**kubectl get node --label-columns=eks.amazonaws.com/capacityType,karpenter.sh/capacity-type**
**kubectl get node --label-columns=node.kubernetes.io/instance-type,topology.kubernetes.io/zone**

# Next, scale the number of replicas back down to 5:
kubectl scale deployment inflate --replicas 5

# The output will show Karpenter identifying specific nodes to cordon, drain and then terminate:
**kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter -c controller**
2023-05-17T07:02:00.768Z	INFO	controller.deprovisioning	deprovisioning via **consolidation delete**, terminating 1 machines ip-192-168-14-81.ap-northeast-2.compute.internal/m5.xlarge/on-demand	{"commit": "d7e22b1-dirty"}
2023-05-17T07:02:00.803Z	INFO	controller.termination	**cordoned** node	{"commit": "d7e22b1-dirty", "node": "ip-192-168-14-81.ap-northeast-2.compute.internal"}
2023-05-17T07:02:01.320Z	INFO	controller.termination	**deleted** node	{"commit": "d7e22b1-dirty", "node": "ip-192-168-14-81.ap-northeast-2.compute.internal"}
2023-05-17T07:02:39.283Z	DEBUG	controller	**deleted** launch template	{"commit": "d7e22b1-dirty", "launch-template": "karpenter.k8s.aws/9547068762493117560"}

# Next, scale the number of replicas back down to 1
kubectl scale deployment inflate --replicas 1
**kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter -c controller**
2023-05-17T07:05:08.877Z	INFO	controller.deprovisioning	deprovisioning via consolidation delete, terminating 1 machines ip-192-168-145-253.ap-northeast-2.compute.internal/m5.xlarge/on-demand	{"commit": "d7e22b1-dirty"}
2023-05-17T07:05:08.914Z	INFO	controller.termination	cordoned node	{"commit": "d7e22b1-dirty", "node": "ip-192-168-145-253.ap-northeast-2.compute.internal"}
2023-05-17T07:05:09.316Z	INFO	controller.termination	deleted node	{"commit": "d7e22b1-dirty", "node": "ip-192-168-145-253.ap-northeast-2.compute.internal"}
2023-05-17T07:05:25.923Z	INFO	controller.deprovisioning	deprovisioning via consolidation replace, terminating 1 machines ip-192-168-48-2.ap-northeast-2.compute.internal/m5.xlarge/on-demand and replacing with on-demand machine from types m5.large, c5.large	{"commit": "d7e22b1-dirty"}
2023-05-17T07:05:25.940Z	INFO	controller.deprovisioning	launching machine with 1 pods requesting {"cpu":"1125m","pods":"4"} from types m5.large, c5.large	{"commit": "d7e22b1-dirty", "provisioner": "default"}
2023-05-17T07:05:26.341Z	DEBUG	controller.deprovisioning.cloudprovider	created launch template	{"commit": "d7e22b1-dirty", "provisioner": "default", "launch-template-name": "karpenter.k8s.aws/9547068762493117560", "launch-template-id": "lt-036151ea9df7d309f"}
2023-05-17T07:05:28.182Z	INFO	controller.deprovisioning.cloudprovider	launched instance	{"commit": "d7e22b1-dirty", "provisioner": "default", "id": "i-0eb3c8ff63724dc95", "hostname": "ip-192-168-144-98.ap-northeast-2.compute.internal", "instance-type": "c5.large", "zone": "ap-northeast-2b", "capacity-type": "on-demand", "capacity": {"cpu":"2","ephemeral-storage":"20Gi","memory":"3788Mi","pods":"29"}}
2023-05-17T07:06:12.307Z	INFO	controller.termination	cordoned node	{"commit": "d7e22b1-dirty", "node": "ip-192-168-48-2.ap-northeast-2.compute.internal"}
2023-05-17T07:06:12.856Z	INFO	controller.termination	deleted node	{"commit": "d7e22b1-dirty", "node": "ip-192-168-48-2.ap-northeast-2.compute.internal"}

# 인스턴스 확인
kubectl get node -l type=karpenter
kubectl get node --label-columns=eks.amazonaws.com/capacityType,karpenter.sh/capacity-type
kubectl get node --label-columns=node.kubernetes.io/instance-type,topology.kubernetes.io/zone

# 삭제
kubectl delete deployment inflate

 

마지막은 Consolidation 이다.

 

위와같이 새로 provisioners 새로 생성해주고

거의 실시간으로 확장이된다. !!! 굿!!!!

이번에는 위와같이 줄여보면 바로바로 줄어든다. !

 

'job > eks' 카테고리의 다른 글

7주차 eks 스터디  (0) 2023.06.08
eks 6주차  (0) 2023.05.31
4주차 eks 스터디  (0) 2023.05.16
3주차 eks 스터디  (0) 2023.05.13
eks 2주차  (0) 2023.05.02

+ Recent posts