From 2480c829440d446e432ac6059d12bd7f8abbeeca Mon Sep 17 00:00:00 2001 From: ArgoCD Setup Date: Fri, 23 Jan 2026 18:45:00 -0300 Subject: [PATCH] fix(aula-08): prevenir volume stalling com CSI tolerations e PDB - Adicionar hcloud-csi-values.yaml com tolerations para node failures - Configurar 2 replicas do CSI controller para HA - Criar statefulset-pdb.yaml para proteger StatefulSets durante drain - Documentar troubleshooting de volumes stuck no README --- aula-08/README.md | 46 +++++++++++++++++++++++++++++++--- aula-08/hcloud-csi-values.yaml | 13 ++++++++++ aula-08/setup.sh | 6 +++++ aula-08/statefulset-pdb.yaml | 12 +++++++++ 4 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 aula-08/hcloud-csi-values.yaml create mode 100644 aula-08/statefulset-pdb.yaml diff --git a/aula-08/README.md b/aula-08/README.md index 834234d..5e29036 100644 --- a/aula-08/README.md +++ b/aula-08/README.md @@ -202,7 +202,47 @@ aula-08/ ├── install-nginx-ingress.sh # Instala NGINX Ingress com LB ├── install-metrics-server.sh # Instala Metrics Server (kubectl top, HPA) ├── nginx-ingress-values.yaml # Configuracao do NGINX Ingress -└── talos-patches/ # Patches de configuracao Talos - ├── control-plane.yaml - └── worker.yaml +├── talos-patches/ # Patches de configuracao Talos +│ ├── control-plane.yaml +│ └── worker.yaml +├── hcloud-csi-values.yaml # Configuracao do CSI Driver +└── statefulset-pdb.yaml # PDB para proteger StatefulSets ``` + +## Troubleshooting: Volume Stuck + +Se um pod ficar `Pending` aguardando volume: + +### 1. Verificar VolumeAttachment + +```bash +kubectl get volumeattachments +kubectl describe volumeattachment +``` + +### 2. Se o node de origem nao existe mais + +```bash +# Deletar o VolumeAttachment orfao (seguro pois node nao existe) +kubectl delete volumeattachment +``` + +### 3. Se o node existe mas pod morreu + +```bash +# Aguardar - Kubernetes vai liberar automaticamente +# Timeout padrao: 6 minutos +``` + +### 4. Verificar no Hetzner + +```bash +hcloud volume list +# Se volume mostra attached a server que nao existe, abrir ticket +``` + +### Limitacoes do Block Storage + +- Volumes Hetzner sao **RWO** (ReadWriteOnce) - single-attach por design +- Podem ficar stuck por ate 6 min (timeout do Kubernetes) +- Se node morrer abruptamente, recuperacao pode ser manual (deletar VolumeAttachment) diff --git a/aula-08/hcloud-csi-values.yaml b/aula-08/hcloud-csi-values.yaml new file mode 100644 index 0000000..0e00b31 --- /dev/null +++ b/aula-08/hcloud-csi-values.yaml @@ -0,0 +1,13 @@ +# Configuracoes para graceful handling de node failures +controller: + tolerations: + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 60 + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 60 + # Mais replicas para HA do controller + replicaCount: 2 diff --git a/aula-08/setup.sh b/aula-08/setup.sh index 676eef7..46528b3 100755 --- a/aula-08/setup.sh +++ b/aula-08/setup.sh @@ -534,6 +534,7 @@ log_info "Instalando CSI Driver via Helm..." helm upgrade --install hcloud-csi hcloud/hcloud-csi \ -n kube-system \ + -f "$SCRIPT_DIR/hcloud-csi-values.yaml" \ --wait \ --timeout 5m @@ -543,6 +544,11 @@ log_success "Hetzner CSI Driver instalado!" log_info "Verificando StorageClass..." kubectl get storageclass hcloud-volumes +# Configurar PDB para StatefulSets (protecao durante drain) +log_info "Criando PodDisruptionBudget para StatefulSets..." +kubectl apply -f "$SCRIPT_DIR/statefulset-pdb.yaml" +log_success "PDB criado" + echo "" ############################################################ diff --git a/aula-08/statefulset-pdb.yaml b/aula-08/statefulset-pdb.yaml new file mode 100644 index 0000000..337b7ac --- /dev/null +++ b/aula-08/statefulset-pdb.yaml @@ -0,0 +1,12 @@ +# PodDisruptionBudget para proteger StatefulSets durante node drain +# Evita que volumes fiquem stuck durante operacoes de manutencao +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: statefulset-pdb + namespace: default +spec: + minAvailable: 0 + selector: + matchLabels: + app.kubernetes.io/component: primary