2025-12-31 17:57:02 -03:00
parent 50dc74c1d8
commit 07b7ee62d3
35 changed files with 4665 additions and 311 deletions
							
							
							
						
@@ -0,0 +1,208 @@
# Aula 08 - Cluster Kubernetes HA na Hetzner Cloud
## Objetivo
Provisionar um cluster Kubernetes Talos de producao na Hetzner Cloud usando OpenTofu, com opcoes de Alta Disponibilidade (HA) e LoadBalancer.
## Arquitetura
### Modo HA com LoadBalancer (Recomendado)
```
                    Internet
                        │
                        ▼
            ┌───────────────────────┐
            │   Hetzner Load        │
            │   Balancer (LB11)     │
            │   IP: 1.2.3.4         │
            └───────────────────────┘
                        │
        ┌───────────────┼───────────────┐
        │               │               │
        ▼               ▼               ▼
   ┌─────────┐    ┌─────────┐    ┌─────────┐
   │  CP-0   │    │  CP-1   │    │  CP-2   │
   │ CAX11   │    │ CAX11   │    │ CAX11   │
   │10.0.1.10│    │10.0.1.11│    │10.0.1.12│
   └─────────┘    └─────────┘    └─────────┘
        │               │               │
        └───────────────┼───────────────┘
                        │
                        ▼
                  ┌──────────┐
                  │ Worker-0 │
                  │  CAX11   │
                  │10.0.1.20 │
                  └──────────┘
```
### Servicos do LoadBalancer
O LoadBalancer centraliza todo o trafego externo:
| Porta | Destino | Uso |
|:-----:|---------|-----|
| 6443 | Control Planes | Kubernetes API |
| 50000 | Control Planes | Talos API |
| 80 | Workers | HTTP (NGINX Ingress) |
| 443 | Workers | HTTPS (NGINX Ingress) |
| 22 | Workers | SSH (GitLab) |
### Roteamento L7 (por dominio)
O LoadBalancer faz apenas roteamento L4 (por porta). O roteamento por dominio e feito pelo NGINX Ingress:
```
LB :443 → NGINX Ingress → n8n.kube.quest      → n8n pods
                        → git.kube.quest      → gitlab pods
                        → argocd.kube.quest   → argocd pods
                        → registry.git...     → registry pods
```
## Custos Estimados
Precos baseados em [Hetzner Cloud](https://www.hetzner.com/cloud/) (NBG1 - Nuremberg):
| Configuracao | CPs | LB | Custo/mes |
|--------------|:---:|:--:|----------:|
| Single | 1 | Floating IP | ~$13 |
| HA | 3 | Floating IP | ~$22 |
| HA + LB | 3 | LB11 | ~$24 |
| HA + LB + Autoscaler (max 5 workers) | 3 | LB11 | ~$43 |
Recursos: CAX11 $4.59, LB11 $5.99, Floating IP $3.29
## Pre-requisitos
```bash
# macOS
brew install opentofu
brew install siderolabs/tap/talosctl
brew install kubectl
brew install helm
# Criar imagem Talos (aula-07)
# Obter ID: hcloud image list --type snapshot
```
## Comandos
```bash
# Provisionar cluster (interativo)
./setup.sh
# Destruir infraestrutura
./cleanup.sh
```
## Fluxo do Setup
```
1. Verifica pre-requisitos
2. Coleta credenciais:
   - Token Hetzner Cloud
   - Chave SSH
   - ID da imagem Talos
3. Pergunta configuracao:
   - Cluster HA? (S/n)
   - LoadBalancer? (S/n) [se HA]
4. Cria terraform.tfvars
5. Executa tofu init/plan/apply
6. Aguarda cluster ficar pronto
7. Instala CCM (Cloud Controller Manager)
8. Instala Cluster Autoscaler
9. Instala Hetzner CSI Driver
10. Instala Metrics Server
11. Instala NGINX Ingress Controller
```
## Componentes Instalados
| Componente | Funcao |
|------------|--------|
| Hetzner CCM | Remove taints dos workers, provisiona LoadBalancers |
| Cluster Autoscaler | Escala workers de 1 a 5 automaticamente |
| Hetzner CSI Driver | Provisiona volumes persistentes (StorageClass: hcloud-volumes) |
| Metrics Server | Habilita kubectl top e HPA baseado em CPU/memoria |
| NGINX Ingress | Expoe servicos HTTP/HTTPS/SSH via LoadBalancer |
## Apos o Setup
### Configurar kubectl
```bash
export KUBECONFIG=$PWD/kubeconfig
kubectl get nodes
```
### Configurar talosctl
```bash
export TALOSCONFIG=$PWD/talosconfig
talosctl -n <IP> health
```
### Ver logs do Autoscaler
```bash
kubectl logs -n cluster-autoscaler -l app=cluster-autoscaler -f
```
### Ver IP do LoadBalancer
```bash
kubectl get svc -n ingress-nginx ingress-nginx-controller -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
```
### Ver StorageClass
```bash
kubectl get storageclass
# Esperado: hcloud-volumes (default)
```
### Ver uso de recursos
```bash
kubectl top nodes    # Uso de CPU/memoria por no
kubectl top pods -A  # Uso de CPU/memoria por pod
```
### Testar Autoscaler
```bash
# Criar pods pending (vai escalar workers)
kubectl create deployment test --image=nginx --replicas=20
# Acompanhar
kubectl get nodes -w
# Limpar
kubectl delete deployment test
```
## Proximos Passos
1. **Configurar DNS** - Apontar dominio para o IP do LoadBalancer
2. **Deploy n8n** (aula-09) - Workflow automation com PostgreSQL e Redis
3. **Deploy GitLab** (aula-10) - Git + Container Registry + SSH
## Arquivos
```
aula-08/
├── main.tf                    # Recursos principais (servers, LB, Talos)
├── variables.tf               # Variaveis de entrada
├── outputs.tf                 # Outputs do cluster
├── versions.tf                # Versoes dos providers
├── setup.sh                   # Script de setup interativo
├── cleanup.sh                 # Script de destruicao
├── cluster-autoscaler.yaml    # Manifesto do autoscaler
├── install-nginx-ingress.sh   # Instala NGINX Ingress com LB
├── install-metrics-server.sh  # Instala Metrics Server (kubectl top, HPA)
├── nginx-ingress-values.yaml  # Configuracao do NGINX Ingress
└── talos-patches/             # Patches de configuracao Talos
    ├── control-plane.yaml
    └── worker.yaml
```
							
							
								
							
							
						
@@ -20,7 +20,7 @@ cd "$SCRIPT_DIR"
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[OK]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_error() { echo -e "${RED}[ERRO]${NC} $1"; }
echo ""
echo "============================================"
							
								
							
							
								
							
							
						
@@ -57,6 +57,9 @@ fi
log_warn "ATENÇÃO: Esta operação irá DESTRUIR todos os recursos!"
echo ""
echo "Recursos que serão removidos:"
echo "  - NGINX Ingress Controller"
echo "  - Hetzner CSI Driver"
echo "  - LoadBalancer (Hetzner LB)"
echo "  - 3x Control Plane nodes"
echo "  - Workers (incluindo os criados pelo autoscaler)"
echo "  - Rede privada"
							
								
							
							
								
							
							
						
@@ -99,6 +102,76 @@ if [ -f "terraform.tfvars" ]; then
    fi
fi
# ==========================================================================
# Remover LoadBalancer criado pelo CCM (não gerenciado pelo OpenTofu)
# ==========================================================================
# O LoadBalancer é criado pelo Hetzner CCM quando o NGINX Ingress é instalado.
# Ele está conectado à subnet e impede a destruição da rede se não for removido.
log_info "Verificando LoadBalancer do Hetzner CCM..."
# Tentar via kubectl primeiro (se cluster ainda acessível)
if [ -f "kubeconfig" ]; then
    export KUBECONFIG="$SCRIPT_DIR/kubeconfig"
    if kubectl cluster-info &>/dev/null; then
        if kubectl get svc -n ingress-nginx nginx-ingress-ingress-nginx-controller &>/dev/null; then
            log_info "Removendo Service LoadBalancer via kubectl..."
            kubectl delete svc nginx-ingress-ingress-nginx-controller -n ingress-nginx --timeout=60s 2>/dev/null || true
            sleep 10  # Aguardar CCM processar a remoção
        fi
    fi
fi
# Fallback: deletar diretamente via hcloud CLI
if [ -f "terraform.tfvars" ]; then
    HCLOUD_TOKEN=$(grep 'hcloud_token' terraform.tfvars | cut -d'"' -f2)
    if [ -n "$HCLOUD_TOKEN" ]; then
        LB_NAME="k8s-ingress"  # Nome definido no nginx-ingress-values.yaml
        if HCLOUD_TOKEN="$HCLOUD_TOKEN" hcloud load-balancer describe "$LB_NAME" &>/dev/null; then
            log_info "Removendo LoadBalancer '$LB_NAME' via hcloud CLI..."
            HCLOUD_TOKEN="$HCLOUD_TOKEN" hcloud load-balancer delete "$LB_NAME" --quiet 2>/dev/null || true
            log_success "LoadBalancer removido"
            sleep 5  # Aguardar Hetzner processar
        else
            log_info "LoadBalancer '$LB_NAME' não encontrado (OK)"
        fi
    fi
fi
# ==========================================================================
# Remover NGINX Ingress Controller
# ==========================================================================
# Instalado pelo setup.sh para expor serviços HTTP/HTTPS/SSH
if [ -f "kubeconfig" ]; then
    export KUBECONFIG="$SCRIPT_DIR/kubeconfig"
    if kubectl cluster-info &>/dev/null; then
        if helm status ingress-nginx -n ingress-nginx &>/dev/null; then
            log_info "Removendo NGINX Ingress Controller..."
            helm uninstall ingress-nginx -n ingress-nginx --wait 2>/dev/null || true
            kubectl delete namespace ingress-nginx --wait=false 2>/dev/null || true
            log_success "NGINX Ingress removido"
            sleep 5  # Aguardar processamento
        fi
    fi
fi
# ==========================================================================
# Remover Hetzner CSI Driver
# ==========================================================================
# Instalado pelo setup.sh para provisionar volumes persistentes
if [ -f "kubeconfig" ]; then
    export KUBECONFIG="$SCRIPT_DIR/kubeconfig"
    if kubectl cluster-info &>/dev/null; then
        if helm status hcloud-csi -n kube-system &>/dev/null; then
            log_info "Removendo Hetzner CSI Driver..."
            helm uninstall hcloud-csi -n kube-system --wait 2>/dev/null || true
            log_success "Hetzner CSI Driver removido"
        fi
    fi
fi
echo ""
log_info "Destruindo infraestrutura via OpenTofu..."
echo ""
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -106,17 +106,17 @@ spec:
        app: cluster-autoscaler
    spec:
      serviceAccountName: cluster-autoscaler
      # Use host network to access external APIs (Hetzner)
      hostNetwork: true
      dnsPolicy: ClusterFirstWithHostNet
      # Workaround: Talos DNS proxy doesn't forward to upstream correctly
      hostAliases:
        - ip: "213.239.246.73"
          hostnames:
            - "api.hetzner.cloud"
      containers:
        - name: cluster-autoscaler
          image: registry.k8s.io/autoscaling/cluster-autoscaler:v1.31.0
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop: ["ALL"]
            runAsNonRoot: true
            runAsUser: 65532
            seccompProfile:
              type: RuntimeDefault
          command:
            - ./cluster-autoscaler
            - --cloud-provider=hetzner
							
								
							
							
							
						
 
							
							
							
						
@@ -1,139 +0,0 @@
#!/bin/bash
############################################################
# Instala o Cluster Autoscaler no cluster Talos
# Requer: cluster provisionado via setup.sh
############################################################
set -e
# Cores
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[OK]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
echo ""
echo "============================================"
echo "  Instalando Cluster Autoscaler"
echo "============================================"
echo ""
# Verificar pré-requisitos
if [ ! -f "kubeconfig" ]; then
    log_error "kubeconfig não encontrado! Execute setup.sh primeiro."
    exit 1
fi
if [ ! -f "terraform.tfvars" ]; then
    log_error "terraform.tfvars não encontrado!"
    exit 1
fi
export KUBECONFIG="$SCRIPT_DIR/kubeconfig"
# Verificar conexão com cluster
log_info "Verificando conexão com o cluster..."
if ! kubectl get nodes &>/dev/null; then
    log_error "Não foi possível conectar ao cluster!"
    exit 1
fi
log_success "Conectado ao cluster"
# Obter valores do OpenTofu
log_info "Obtendo configurações do OpenTofu..."
WORKER_CONFIG_BASE64=$(tofu output -raw autoscaler_worker_config 2>/dev/null)
TALOS_IMAGE_ID=$(tofu output -raw autoscaler_image_id 2>/dev/null)
CLUSTER_NAME=$(tofu output -raw cluster_name 2>/dev/null)
NETWORK_ID=$(tofu output -raw network_id 2>/dev/null)
FIREWALL_ID=$(tofu output -raw firewall_id 2>/dev/null)
SSH_KEY_NAME=$(tofu output -raw ssh_key_name 2>/dev/null)
# Obter token do terraform.tfvars
HCLOUD_TOKEN=$(grep 'hcloud_token' terraform.tfvars | cut -d'"' -f2)
if [ -z "$WORKER_CONFIG_BASE64" ] || [ -z "$HCLOUD_TOKEN" ]; then
    log_error "Não foi possível obter as configurações necessárias!"
    exit 1
fi
log_success "Configurações obtidas"
echo "  - Cluster: $CLUSTER_NAME"
echo "  - Image ID: $TALOS_IMAGE_ID"
echo "  - Network ID: $NETWORK_ID"
echo "  - SSH Key: $SSH_KEY_NAME"
echo ""
# Criar namespace com política privileged (necessário para hostNetwork)
log_info "Criando namespace cluster-autoscaler..."
kubectl create namespace cluster-autoscaler --dry-run=client -o yaml | kubectl apply -f -
kubectl label namespace cluster-autoscaler pod-security.kubernetes.io/enforce=privileged --overwrite
# Criar secret com credenciais
log_info "Criando secret com credenciais..."
kubectl create secret generic hcloud-autoscaler \
    --namespace cluster-autoscaler \
    --from-literal=token="$HCLOUD_TOKEN" \
    --from-literal=cloud-init="$WORKER_CONFIG_BASE64" \
    --dry-run=client -o yaml | kubectl apply -f -
log_success "Secret criado"
# Aplicar RBAC e Deployment
log_info "Aplicando manifesto do cluster-autoscaler..."
# Substituir variáveis no template e aplicar
cat cluster-autoscaler.yaml | \
    sed "s|\${TALOS_IMAGE_ID}|$TALOS_IMAGE_ID|g" | \
    sed "s|\${NETWORK_NAME}|$CLUSTER_NAME-network|g" | \
    sed "s|\${FIREWALL_NAME}|$CLUSTER_NAME-firewall|g" | \
    sed "s|\${SSH_KEY_NAME}|$SSH_KEY_NAME|g" | \
    kubectl apply -f -
log_success "Cluster Autoscaler instalado!"
# Aguardar pod ficar pronto
log_info "Aguardando pod do autoscaler..."
kubectl wait --for=condition=ready pod \
    -l app=cluster-autoscaler \
    -n cluster-autoscaler \
    --timeout=120s
echo ""
log_success "Cluster Autoscaler pronto!"
echo ""
echo "============================================"
echo "  Configuração do Autoscaler"
echo "============================================"
echo ""
echo "  Pool: worker-pool"
echo "  Tipo: CAX11 (ARM64)"
echo "  Região: nbg1 (Nuremberg)"
echo "  Min nodes: 1"
echo "  Max nodes: 5"
echo ""
echo "  Scale down após: 5 minutos"
echo "  Utilização mínima: 50%"
echo ""
echo "Comandos úteis:"
echo ""
echo "  # Ver logs do autoscaler"
echo "  kubectl logs -n cluster-autoscaler -l app=cluster-autoscaler -f"
echo ""
echo "  # Ver status dos nodes"
echo "  kubectl get nodes"
echo ""
echo "  # Testar scale up (criar pods pending)"
echo "  kubectl create deployment test --image=nginx --replicas=10"
echo ""
							
							
							
						
@@ -0,0 +1,121 @@
#!/bin/bash
# =============================================================================
# Instala o Metrics Server para suporte a HPA e kubectl top
# =============================================================================
#
# O Metrics Server é necessário para:
#   - Horizontal Pod Autoscaler (HPA) baseado em CPU/memória
#   - kubectl top pods/nodes
#   - Decisões de scheduling baseadas em métricas
#
# Pré-requisitos:
#   - Cluster Kubernetes rodando
#   - Helm 3.x instalado
#
# Uso:
#   ./install-metrics-server.sh
#
# =============================================================================
set -e
# Cores para output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[OK]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERRO]${NC} $1"; }
# =============================================================================
# VERIFICAÇÕES
# =============================================================================
log_info "Verificando pré-requisitos..."
if ! command -v kubectl &> /dev/null; then
    log_error "kubectl não encontrado"
    exit 1
fi
if ! command -v helm &> /dev/null; then
    log_error "Helm não encontrado"
    exit 1
fi
# Verificar conexão com o cluster
if ! kubectl cluster-info &> /dev/null; then
    log_error "Não foi possível conectar ao cluster"
    log_error "Verifique se KUBECONFIG está configurado corretamente"
    exit 1
fi
log_success "Pré-requisitos OK"
# =============================================================================
# INSTALAR METRICS SERVER
# =============================================================================
log_info "Adicionando repositório Helm..."
helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ 2>/dev/null || true
helm repo update
log_info "Instalando Metrics Server..."
# Nota: --kubelet-insecure-tls é necessário para Talos Linux
# pois os certificados do kubelet não são assinados pela CA do cluster
helm upgrade --install metrics-server metrics-server/metrics-server \
    -n kube-system \
    --set 'args[0]=--kubelet-insecure-tls' \
    --wait \
    --timeout 2m
log_success "Metrics Server instalado!"
# =============================================================================
# VERIFICAR INSTALAÇÃO
# =============================================================================
log_info "Aguardando Metrics Server ficar pronto..."
for i in {1..30}; do
    if kubectl get --raw "/apis/metrics.k8s.io/v1beta1/nodes" &> /dev/null; then
        break
    fi
    echo -n "."
    sleep 2
done
echo ""
# Testar se as métricas estão disponíveis
if kubectl get --raw "/apis/metrics.k8s.io/v1beta1/nodes" &> /dev/null; then
    log_success "Metrics API disponível!"
    echo ""
    log_info "Testando kubectl top nodes..."
    kubectl top nodes 2>/dev/null || log_warn "Métricas ainda sendo coletadas, aguarde alguns segundos..."
else
    log_warn "Metrics API ainda não disponível"
    log_warn "Aguarde alguns segundos e execute: kubectl top nodes"
fi
# =============================================================================
# RESUMO
# =============================================================================
echo ""
echo "=============================================="
echo -e "${GREEN}  Metrics Server Instalado!${NC}"
echo "=============================================="
echo ""
echo "Agora você pode usar:"
echo "  kubectl top nodes    # Ver uso de recursos dos nós"
echo "  kubectl top pods     # Ver uso de recursos dos pods"
echo ""
echo "HPAs agora podem escalar baseado em CPU/memória!"
echo ""
echo "=============================================="
							
							
							
						
@@ -0,0 +1,139 @@
#!/bin/bash
# =============================================================================
# Instala/Atualiza o NGINX Ingress Controller com LoadBalancer Hetzner
# =============================================================================
#
# Este script configura o NGINX Ingress para:
#   - Usar LoadBalancer da Hetzner (requer CCM instalado)
#   - Suportar TCP passthrough para SSH do GitLab
#   - Comunicação via rede privada
#
# Pré-requisitos:
#   - Cluster Kubernetes rodando
#   - Hetzner CCM instalado (./install-ccm.sh)
#   - Helm 3.x instalado
#
# Uso:
#   ./install-nginx-ingress.sh
#
# =============================================================================
set -e
# Cores para output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[OK]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERRO]${NC} $1"; }
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# =============================================================================
# VERIFICAÇÕES
# =============================================================================
log_info "Verificando pré-requisitos..."
if ! command -v kubectl &> /dev/null; then
    log_error "kubectl não encontrado"
    exit 1
fi
if ! command -v helm &> /dev/null; then
    log_error "Helm não encontrado"
    exit 1
fi
# Verificar se CCM está instalado
if ! kubectl get deployment hccm-hcloud-cloud-controller-manager -n kube-system &> /dev/null; then
    log_warn "Hetzner CCM não parece estar instalado"
    log_warn "Execute ./install-ccm.sh primeiro para LoadBalancer automático"
    read -p "Continuar mesmo assim? (y/N) " -n 1 -r
    echo
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
        exit 1
    fi
fi
log_success "Pré-requisitos OK"
# =============================================================================
# INSTALAR NGINX INGRESS
# =============================================================================
log_info "Adicionando repositório Helm..."
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx 2>/dev/null || true
helm repo update
log_info "Instalando/Atualizando NGINX Ingress Controller..."
helm upgrade --install nginx-ingress ingress-nginx/ingress-nginx \
    -n ingress-nginx \
    --create-namespace \
    -f "$SCRIPT_DIR/nginx-ingress-values.yaml" \
    --wait \
    --timeout 5m
log_success "NGINX Ingress instalado!"
# =============================================================================
# AGUARDAR LOADBALANCER
# =============================================================================
log_info "Aguardando LoadBalancer receber IP externo..."
echo "(pode levar 1-2 minutos)"
for i in {1..60}; do
    EXTERNAL_IP=$(kubectl get svc nginx-ingress-ingress-nginx-controller \
        -n ingress-nginx \
        -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "")
    if [ -n "$EXTERNAL_IP" ] && [ "$EXTERNAL_IP" != "<pending>" ]; then
        break
    fi
    echo -n "."
    sleep 2
done
echo ""
if [ -n "$EXTERNAL_IP" ] && [ "$EXTERNAL_IP" != "<pending>" ]; then
    log_success "LoadBalancer IP: $EXTERNAL_IP"
else
    log_warn "LoadBalancer ainda não tem IP. Verifique com:"
    echo "  kubectl get svc -n ingress-nginx"
fi
# =============================================================================
# RESUMO
# =============================================================================
echo ""
echo "=============================================="
echo -e "${GREEN}  NGINX Ingress Configurado!${NC}"
echo "=============================================="
echo ""
echo "LoadBalancer IP: ${EXTERNAL_IP:-<pendente>}"
echo ""
echo "Portas expostas:"
echo "  - 80  (HTTP)"
echo "  - 443 (HTTPS)"
echo "  - 22  (SSH - para GitLab)"
echo ""
echo "Próximos passos:"
echo "  1. Configure DNS apontando para o IP acima:"
echo "     - n8n.kube.quest      → $EXTERNAL_IP"
echo "     - git.kube.quest      → $EXTERNAL_IP"
echo "     - registry.git.kube.quest → $EXTERNAL_IP"
echo ""
echo "  2. Instale o GitLab:"
echo "     cd ../aula-09 && ./setup.sh"
echo ""
echo "=============================================="
							
							
								
							
							
						
@@ -32,7 +32,12 @@ resource "random_string" "cluster_id" {
}
locals {
  cluster_name = "talos-${random_string.cluster_id.result}"
  cluster_name        = "talos-${random_string.cluster_id.result}"
  control_plane_count = var.enable_ha ? 3 : 1
  # Endpoint: LoadBalancer IP if enabled, otherwise Floating IP
  cluster_endpoint_ip = var.enable_loadbalancer ? hcloud_load_balancer.cluster[0].ipv4 : hcloud_floating_ip.control_plane[0].ip_address
  common_labels = {
    cluster     = local.cluster_name
    environment = var.environment
							
								
							
							
								
							
							
						
@@ -191,7 +196,7 @@ resource "hcloud_placement_group" "cluster" {
############################################################
resource "hcloud_server" "control_plane" {
  count       = 3
  count       = local.control_plane_count
  name        = "${local.cluster_name}-cp-${count.index}"
  server_type = "cax11"
  image       = data.hcloud_image.talos.id
							
								
							
							
								
							
							
						
@@ -218,14 +223,15 @@ resource "hcloud_server" "control_plane" {
}
resource "hcloud_server_network" "control_plane" {
  count      = 3
  count      = local.control_plane_count
  server_id  = hcloud_server.control_plane[count.index].id
  network_id = hcloud_network.cluster.id
  ip         = "10.0.1.${10 + count.index}"
}
# Floating IP for stable control plane access
# Floating IP for stable control plane access (only if LoadBalancer is disabled)
resource "hcloud_floating_ip" "control_plane" {
  count         = var.enable_loadbalancer ? 0 : 1
  type          = "ipv4"
  name          = "${local.cluster_name}-cp-ip"
  home_location = "nbg1"
							
							
							
								
							
						
@@ -233,10 +239,139 @@ resource "hcloud_floating_ip" "control_plane" {
}
resource "hcloud_floating_ip_assignment" "control_plane" {
  floating_ip_id = hcloud_floating_ip.control_plane.id
  count          = var.enable_loadbalancer ? 0 : 1
  floating_ip_id = hcloud_floating_ip.control_plane[0].id
  server_id      = hcloud_server.control_plane[0].id
}
############################################################
# LOAD BALANCER (for HA access to control plane and ingress)
############################################################
resource "hcloud_load_balancer" "cluster" {
  count              = var.enable_loadbalancer ? 1 : 0
  name               = "${local.cluster_name}-lb"
  load_balancer_type = "lb11"
  location           = "nbg1"
  labels             = local.common_labels
}
resource "hcloud_load_balancer_network" "cluster" {
  count            = var.enable_loadbalancer ? 1 : 0
  load_balancer_id = hcloud_load_balancer.cluster[0].id
  network_id       = hcloud_network.cluster.id
  ip               = "10.0.1.2"
  depends_on = [hcloud_network_subnet.cluster]
}
# Kubernetes API (6443) -> Control Planes
resource "hcloud_load_balancer_service" "kubernetes_api" {
  count            = var.enable_loadbalancer ? 1 : 0
  load_balancer_id = hcloud_load_balancer.cluster[0].id
  protocol         = "tcp"
  listen_port      = 6443
  destination_port = 6443
  health_check {
    protocol = "tcp"
    port     = 6443
    interval = 10
    timeout  = 5
    retries  = 3
  }
}
# Talos API (50000) -> Control Planes
resource "hcloud_load_balancer_service" "talos_api" {
  count            = var.enable_loadbalancer ? 1 : 0
  load_balancer_id = hcloud_load_balancer.cluster[0].id
  protocol         = "tcp"
  listen_port      = 50000
  destination_port = 50000
  health_check {
    protocol = "tcp"
    port     = 50000
    interval = 10
    timeout  = 5
    retries  = 3
  }
}
# HTTP (80) -> Workers (NGINX Ingress)
resource "hcloud_load_balancer_service" "http" {
  count            = var.enable_loadbalancer ? 1 : 0
  load_balancer_id = hcloud_load_balancer.cluster[0].id
  protocol         = "tcp"
  listen_port      = 80
  destination_port = 80
  health_check {
    protocol = "tcp"
    port     = 80
    interval = 10
    timeout  = 5
    retries  = 3
  }
}
# HTTPS (443) -> Workers (NGINX Ingress)
resource "hcloud_load_balancer_service" "https" {
  count            = var.enable_loadbalancer ? 1 : 0
  load_balancer_id = hcloud_load_balancer.cluster[0].id
  protocol         = "tcp"
  listen_port      = 443
  destination_port = 443
  health_check {
    protocol = "tcp"
    port     = 443
    interval = 10
    timeout  = 5
    retries  = 3
  }
}
# SSH (22) -> Workers (GitLab SSH)
resource "hcloud_load_balancer_service" "ssh" {
  count            = var.enable_loadbalancer ? 1 : 0
  load_balancer_id = hcloud_load_balancer.cluster[0].id
  protocol         = "tcp"
  listen_port      = 22
  destination_port = 22
  health_check {
    protocol = "tcp"
    port     = 22
    interval = 10
    timeout  = 5
    retries  = 3
  }
}
# LB Targets: Control Planes (for 6443 and 50000)
resource "hcloud_load_balancer_target" "control_plane" {
  count            = var.enable_loadbalancer ? local.control_plane_count : 0
  type             = "server"
  load_balancer_id = hcloud_load_balancer.cluster[0].id
  server_id        = hcloud_server.control_plane[count.index].id
  use_private_ip   = true
  depends_on = [hcloud_load_balancer_network.cluster]
}
# LB Targets: Workers (for 80, 443, and 22)
resource "hcloud_load_balancer_target" "worker" {
  count            = var.enable_loadbalancer ? 1 : 0
  type             = "server"
  load_balancer_id = hcloud_load_balancer.cluster[0].id
  server_id        = hcloud_server.worker[count.index].id
  use_private_ip   = true
  depends_on = [hcloud_load_balancer_network.cluster]
}
############################################################
# WORKER NODE (Single CAX11)
############################################################
							
								
							
							
								
							
							
						
@@ -288,15 +423,15 @@ resource "talos_machine_secrets" "this" {
data "talos_client_configuration" "this" {
  cluster_name         = local.cluster_name
  client_configuration = talos_machine_secrets.this.client_configuration
  endpoints            = [hcloud_floating_ip.control_plane.ip_address]
  endpoints            = [local.cluster_endpoint_ip]
}
# Control plane configuration
data "talos_machine_configuration" "control_plane" {
  count            = 3
  count            = local.control_plane_count
  cluster_name     = local.cluster_name
  machine_type     = "controlplane"
  cluster_endpoint = "https://${hcloud_floating_ip.control_plane.ip_address}:6443"
  cluster_endpoint = "https://${local.cluster_endpoint_ip}:6443"
  machine_secrets  = talos_machine_secrets.this.machine_secrets
  talos_version    = var.talos_version
							
							
							
								
							
						
@@ -304,15 +439,16 @@ data "talos_machine_configuration" "control_plane" {
    templatefile("${path.module}/talos-patches/control-plane.yaml", {
      cluster_name = local.cluster_name
      node_name    = hcloud_server.control_plane[count.index].name
      is_ha        = true
      is_ha        = var.enable_ha
      is_first_cp  = count.index == 0
      etcd_peers   = [for i in range(3) : "10.0.1.${10 + i}"]
      floating_ip  = hcloud_floating_ip.control_plane.ip_address
      etcd_peers   = [for i in range(local.control_plane_count) : "10.0.1.${10 + i}"]
      floating_ip  = local.cluster_endpoint_ip
    })
  ]
  depends_on = [
    hcloud_server.control_plane,
    hcloud_load_balancer.cluster,
    hcloud_floating_ip_assignment.control_plane
  ]
}
							
							
							
								
							
						
@@ -322,7 +458,7 @@ data "talos_machine_configuration" "worker" {
  count            = 1
  cluster_name     = local.cluster_name
  machine_type     = "worker"
  cluster_endpoint = "https://${hcloud_floating_ip.control_plane.ip_address}:6443"
  cluster_endpoint = "https://${local.cluster_endpoint_ip}:6443"
  machine_secrets  = talos_machine_secrets.this.machine_secrets
  talos_version    = var.talos_version
							
							
							
								
							
						
@@ -335,6 +471,7 @@ data "talos_machine_configuration" "worker" {
  depends_on = [
    hcloud_server.worker,
    hcloud_load_balancer.cluster,
    hcloud_floating_ip_assignment.control_plane
  ]
}
							
							
							
								
							
						
@@ -344,7 +481,7 @@ data "talos_machine_configuration" "worker" {
############################################################
resource "talos_machine_configuration_apply" "control_plane" {
  count                       = 3
  count                       = local.control_plane_count
  client_configuration        = talos_machine_secrets.this.client_configuration
  machine_configuration_input = data.talos_machine_configuration.control_plane[count.index].machine_configuration
  endpoint                    = hcloud_server.control_plane[count.index].ipv4_address
							
								
							
							
								
							
							
						
@@ -400,11 +537,11 @@ resource "talos_cluster_kubeconfig" "this" {
############################################################
resource "local_sensitive_file" "kubeconfig" {
  # Replace the internal hostname with the floating IP for external access
  # Replace the internal hostname with the LB/Floating IP for external access
  content = replace(
    talos_cluster_kubeconfig.this.kubeconfig_raw,
    "https://${local.cluster_name}.local:6443",
    "https://${hcloud_floating_ip.control_plane.ip_address}:6443"
    "https://${local.cluster_endpoint_ip}:6443"
  )
  filename = "${path.root}/kubeconfig"
}
							
								
							
							
							
						
 
							
							
							
						
@@ -0,0 +1,88 @@
# =============================================================================
# NGINX Ingress Controller - Configuração para Hetzner Cloud
# =============================================================================
#
# Este values configura o NGINX Ingress com:
#   - LoadBalancer da Hetzner (provisionado automaticamente pelo CCM)
#   - Suporte a TCP para SSH do GitLab (porta 22)
#   - Uso de rede privada para comunicação com os nodes
#
# Uso:
#   helm upgrade --install nginx-ingress ingress-nginx/ingress-nginx \
#     -n ingress-nginx --create-namespace \
#     -f nginx-ingress-values.yaml
#
# =============================================================================
controller:
  # Configuração do Service LoadBalancer
  service:
    type: LoadBalancer
    # Annotations específicas para Hetzner Cloud
    annotations:
      # Nome do LoadBalancer no painel Hetzner
      load-balancer.hetzner.cloud/name: "k8s-ingress"
      # Localização do LoadBalancer (mesmo datacenter do cluster)
      load-balancer.hetzner.cloud/location: "nbg1"
      # Usar rede privada para comunicação com nodes
      # Mais seguro e sem custo de tráfego
      load-balancer.hetzner.cloud/use-private-ip: "true"
      # Tipo do LoadBalancer (lb11 é o menor/mais barato)
      load-balancer.hetzner.cloud/type: "lb11"
      # Health check
      load-balancer.hetzner.cloud/health-check-interval: "5s"
      load-balancer.hetzner.cloud/health-check-timeout: "3s"
      load-balancer.hetzner.cloud/health-check-retries: "3"
  # ==========================================================================
  # TCP Services - Para SSH do GitLab
  # ==========================================================================
  # Mapeia porta externa -> namespace/service:porta
  # O GitLab Shell roda no namespace gitlab, service gitlab-gitlab-shell
  tcp:
    22: "gitlab/gitlab-gitlab-shell:22"
  # Configuração do controller
  config:
    # Habilitar proxy protocol se necessário
    # use-proxy-protocol: "true"
    # Timeouts
    proxy-connect-timeout: "10"
    proxy-read-timeout: "120"
    proxy-send-timeout: "120"
    # Body size para uploads grandes (GitLab, n8n)
    proxy-body-size: "0"
    # Keepalive
    keep-alive: "75"
    keep-alive-requests: "1000"
  # Recursos do controller
  resources:
    requests:
      cpu: 100m
      memory: 128Mi
    limits:
      cpu: 500m
      memory: 256Mi
  # Métricas para monitoramento
  metrics:
    enabled: true
    serviceMonitor:
      enabled: false  # Habilitar se usar Prometheus Operator
  # Admission webhook
  admissionWebhooks:
    enabled: true
# Default backend (opcional)
defaultBackend:
  enabled: false
							
							
								
							
							
						
@@ -26,8 +26,13 @@ output "network_cidr" {
# Control Plane Information
output "control_plane_ip" {
  description = "Public IP address of the control plane"
  value       = hcloud_floating_ip.control_plane.ip_address
  description = "Public IP address of the control plane (LB or Floating IP)"
  value       = local.cluster_endpoint_ip
}
output "load_balancer_ip" {
  description = "Public IP of the Load Balancer (if enabled)"
  value       = var.enable_loadbalancer ? hcloud_load_balancer.cluster[0].ipv4 : null
}
output "control_plane_private_ips" {
							
								
							
							
								
							
							
						
@@ -70,22 +75,23 @@ output "talosconfig_path" {
# API Endpoints
output "kubernetes_api_endpoint" {
  description = "Kubernetes API server endpoint"
  value       = "https://${hcloud_floating_ip.control_plane.ip_address}:6443"
  value       = "https://${local.cluster_endpoint_ip}:6443"
}
output "talos_api_endpoint" {
  description = "Talos API endpoint for management"
  value       = "https://${hcloud_floating_ip.control_plane.ip_address}:50000"
  value       = "https://${local.cluster_endpoint_ip}:50000"
}
# Cost Information
output "estimated_monthly_cost" {
  description = "Estimated monthly cost for the infrastructure (EUR)"
  description = "Estimated monthly cost for the infrastructure (USD)"
  value = {
    control_plane = 3 * 3.79  # 3x CAX11
    worker        = 1 * 3.79  # 1x CAX11
    floating_ip   = 3.00      # Floating IPv4
    total         = (4 * 3.79) + 3.00  # ~€18.16
    control_plane = local.control_plane_count * 4.59
    worker        = 1 * 4.59
    load_balancer = var.enable_loadbalancer ? 5.99 : 0
    floating_ip   = var.enable_loadbalancer ? 0 : 3.29
    total         = (local.control_plane_count + 1) * 4.59 + (var.enable_loadbalancer ? 5.99 : 3.29)
  }
}
							
							
							
								
							
						
@@ -104,16 +110,15 @@ output "connection_instructions" {
    2. Configure talosctl:
       export TALOSCONFIG=${local_sensitive_file.talosconfig.filename}
       talosctl --nodes ${hcloud_floating_ip.control_plane.ip_address} health
       talosctl --nodes ${local.cluster_endpoint_ip} health
    3. Access Kubernetes API:
       ${"https://${hcloud_floating_ip.control_plane.ip_address}:6443"}
       https://${local.cluster_endpoint_ip}:6443
    4. Nodes:
       Control Plane: 3x CAX11 (ARM64)
       Control Plane: ${local.control_plane_count}x CAX11 (ARM64)
       Workers: 1x CAX11 (ARM64)
    5. Total Monthly Cost: ~€18/month
       ${var.enable_loadbalancer ? "Load Balancer: LB11" : "Floating IP: IPv4"}
    ====================================
    EOT
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -32,7 +32,7 @@ log_warn() {
}
log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
    echo -e "${RED}[ERRO]${NC} $1"
}
############################################################
							
								
							
							
								
							
							
						
@@ -84,6 +84,18 @@ if ! command -v kubectl &> /dev/null; then
fi
log_success "kubectl $(kubectl version --client -o yaml 2>/dev/null | grep gitVersion | awk '{print $2}' || echo 'instalado')"
# Verificar Helm
if ! command -v helm &> /dev/null; then
    log_error "Helm não encontrado!"
    echo ""
    echo "Instale o Helm:"
    echo "  brew install helm          # macOS"
    echo "  snap install helm --classic # Linux"
    echo ""
    exit 1
fi
log_success "Helm $(helm version --short 2>/dev/null | head -1)"
# Verificar hcloud CLI (opcional, mas útil)
if command -v hcloud &> /dev/null; then
    log_success "hcloud CLI instalado"
							
								
							
							
								
							
							
						
@@ -170,6 +182,52 @@ if [ "$SKIP_CREDENTIALS" != "true" ]; then
    log_success "Image ID: $TALOS_IMAGE_ID"
    echo ""
    # Configuração do Cluster
    echo "============================================"
    echo "  Configuração do Cluster"
    echo "============================================"
    echo ""
    # Cluster HA?
    echo "4. Modo de Alta Disponibilidade (HA)"
    echo ""
    echo "   HA = 3 Control Planes (tolerância a falhas)"
    echo "   Single = 1 Control Plane (menor custo)"
    echo ""
    read -p "   Cluster HA? (S/n): " enable_ha
    if [[ "$enable_ha" =~ ^[Nn]$ ]]; then
        ENABLE_HA="false"
        ENABLE_LB="false"
        log_info "Modo Single: 1 Control Plane"
    else
        ENABLE_HA="true"
        log_success "Modo HA: 3 Control Planes"
        echo ""
        # LoadBalancer?
        echo "5. LoadBalancer para o Control Plane"
        echo ""
        echo "   Com LB: HA real (qualquer CP pode cair)"
        echo "   Sem LB: Floating IP (se CP-0 cair, cluster inacessível)"
        echo ""
        echo "   O LoadBalancer também serve para:"
        echo "   - HTTP/HTTPS (NGINX Ingress)"
        echo "   - SSH (GitLab)"
        echo "   - Talos API"
        echo ""
        echo "   Custo adicional: ~\$6/mes"
        echo ""
        read -p "   Usar LoadBalancer? (S/n): " enable_lb
        if [[ "$enable_lb" =~ ^[Nn]$ ]]; then
            ENABLE_LB="false"
            log_info "Usando Floating IP (sem HA real do CP)"
        else
            ENABLE_LB="true"
            log_success "LoadBalancer habilitado"
        fi
    fi
    echo ""
    # Criar terraform.tfvars
    log_info "Criando terraform.tfvars..."
    cat > terraform.tfvars << EOF
							
							
							
								
							
						
@@ -180,8 +238,9 @@ hcloud_token   = "$HCLOUD_TOKEN"
ssh_public_key = "$SSH_PUBLIC_KEY"
talos_image_id = $TALOS_IMAGE_ID
environment       = "workshop"
enable_monitoring = true
environment         = "prod"
enable_ha           = $ENABLE_HA
enable_loadbalancer = $ENABLE_LB
EOF
    log_success "terraform.tfvars criado"
fi
							
								
							
							
								
							
							
						
@@ -219,16 +278,39 @@ echo ""
log_success "Plano criado!"
echo ""
# Mostrar resumo
# Mostrar resumo baseado na configuração
echo "============================================"
echo "  Recursos a serem criados:"
echo "============================================"
echo ""
echo "  - 4x CAX11 (3 CP + 1 Worker)  = 4 x €3.79 = €15.16"
echo "  - 1x Floating IPv4            = €3.00"
echo "  - Rede/Firewall/Placement     = Grátis"
# Ler configuração do tfvars
ENABLE_HA_CONFIG=$(grep 'enable_ha' terraform.tfvars 2>/dev/null | grep -o 'true\|false' || echo "true")
ENABLE_LB_CONFIG=$(grep 'enable_loadbalancer' terraform.tfvars 2>/dev/null | grep -o 'true\|false' || echo "true")
if [ "$ENABLE_HA_CONFIG" = "true" ]; then
    CP_COUNT=3
    echo "  - 3x CAX11 Control Planes     = 3 x \$4.59 = \$13.77"
else
    CP_COUNT=1
    echo "  - 1x CAX11 Control Plane      = 1 x \$4.59 = \$4.59"
fi
echo "  - 1x CAX11 Worker             = 1 x \$4.59 = \$4.59"
if [ "$ENABLE_LB_CONFIG" = "true" ]; then
    echo "  - 1x Load Balancer LB11       = \$5.99"
    echo "  - Rede/Firewall/Placement     = Gratis"
    LB_COST=5.99
else
    echo "  - 1x Floating IPv4            = \$3.29"
    echo "  - Rede/Firewall/Placement     = Gratis"
    LB_COST=3.29
fi
TOTAL_COST=$(echo "scale=2; ($CP_COUNT + 1) * 4.59 + $LB_COST" | bc)
echo ""
echo "  Custo estimado: ~€18.16/mês (sem VAT)"
echo "  Custo estimado: ~\$${TOTAL_COST}/mes"
echo ""
############################################################
							
								
							
							
								
							
							
						
@@ -312,6 +394,203 @@ fi
echo ""
############################################################
# INSTALAÇÃO DO CCM (Cloud Controller Manager)
############################################################
echo "============================================"
echo "  Instalando Hetzner Cloud Controller Manager"
echo "============================================"
echo ""
# Obter token do terraform.tfvars
HCLOUD_TOKEN=$(grep 'hcloud_token' terraform.tfvars | cut -d'"' -f2)
NETWORK_ID=$(tofu output -raw network_id 2>/dev/null || echo "")
if [ -z "$HCLOUD_TOKEN" ]; then
    log_error "Não foi possível obter HCLOUD_TOKEN!"
    exit 1
fi
# Criar secret para o CCM
log_info "Criando secret hcloud..."
SECRET_DATA="--from-literal=token=$HCLOUD_TOKEN"
if [ -n "$NETWORK_ID" ]; then
    SECRET_DATA="$SECRET_DATA --from-literal=network=$NETWORK_ID"
fi
kubectl create secret generic hcloud \
    $SECRET_DATA \
    -n kube-system \
    --dry-run=client -o yaml | kubectl apply -f -
log_success "Secret criado"
# Instalar CCM via Helm
log_info "Instalando CCM via Helm..."
helm repo add hcloud https://charts.hetzner.cloud 2>/dev/null || true
helm repo update hcloud
HELM_ARGS="--set networking.enabled=true"
HELM_ARGS="$HELM_ARGS --set networking.clusterCIDR=10.244.0.0/16"
if [ -n "$NETWORK_ID" ]; then
    HELM_ARGS="$HELM_ARGS --set networking.network.id=$NETWORK_ID"
fi
helm upgrade --install hccm hcloud/hcloud-cloud-controller-manager \
    -n kube-system \
    $HELM_ARGS \
    --wait
log_success "CCM instalado!"
# Aguardar taint ser removido dos workers
log_info "Aguardando CCM inicializar workers..."
for i in {1..30}; do
    if ! kubectl get nodes -o jsonpath='{.items[*].spec.taints[*].key}' 2>/dev/null | grep -q "node.cloudprovider.kubernetes.io/uninitialized"; then
        log_success "Workers inicializados!"
        break
    fi
    echo -n "."
    sleep 5
done
echo ""
############################################################
# INSTALAÇÃO DO CLUSTER AUTOSCALER
############################################################
echo ""
echo "============================================"
echo "  Instalando Cluster Autoscaler"
echo "============================================"
echo ""
# Obter configurações do OpenTofu
log_info "Obtendo configurações do OpenTofu..."
WORKER_CONFIG_BASE64=$(tofu output -raw autoscaler_worker_config 2>/dev/null)
TALOS_IMAGE_ID=$(tofu output -raw autoscaler_image_id 2>/dev/null)
CLUSTER_NAME=$(tofu output -raw cluster_name 2>/dev/null)
FIREWALL_ID=$(tofu output -raw firewall_id 2>/dev/null)
SSH_KEY_NAME=$(tofu output -raw ssh_key_name 2>/dev/null)
if [ -z "$WORKER_CONFIG_BASE64" ]; then
    log_error "Não foi possível obter configuração do worker!"
    exit 1
fi
log_success "Configurações obtidas"
echo "  - Cluster: $CLUSTER_NAME"
echo "  - Image ID: $TALOS_IMAGE_ID"
echo "  - Network ID: $NETWORK_ID"
echo ""
# Criar namespace
log_info "Criando namespace cluster-autoscaler..."
kubectl create namespace cluster-autoscaler --dry-run=client -o yaml | kubectl apply -f -
kubectl label namespace cluster-autoscaler pod-security.kubernetes.io/enforce=privileged --overwrite
# Criar secret
log_info "Criando secret do autoscaler..."
kubectl create secret generic hcloud-autoscaler \
    --namespace cluster-autoscaler \
    --from-literal=token="$HCLOUD_TOKEN" \
    --from-literal=cloud-init="$WORKER_CONFIG_BASE64" \
    --dry-run=client -o yaml | kubectl apply -f -
log_success "Secret criado"
# Aplicar manifesto
log_info "Aplicando manifesto do cluster-autoscaler..."
cat cluster-autoscaler.yaml | \
    sed "s|\${TALOS_IMAGE_ID}|$TALOS_IMAGE_ID|g" | \
    sed "s|\${NETWORK_NAME}|$CLUSTER_NAME-network|g" | \
    sed "s|\${FIREWALL_NAME}|$CLUSTER_NAME-firewall|g" | \
    sed "s|\${SSH_KEY_NAME}|$SSH_KEY_NAME|g" | \
    kubectl apply -f -
# Aguardar pod ficar pronto
log_info "Aguardando pod do autoscaler..."
kubectl wait --for=condition=ready pod \
    -l app=cluster-autoscaler \
    -n cluster-autoscaler \
    --timeout=120s
log_success "Cluster Autoscaler instalado!"
echo ""
############################################################
# INSTALAÇÃO DO HETZNER CSI DRIVER
############################################################
echo "============================================"
echo "  Instalando Hetzner CSI Driver"
echo "============================================"
echo ""
log_info "Instalando CSI Driver via Helm..."
helm upgrade --install hcloud-csi hcloud/hcloud-csi \
    -n kube-system \
    --wait \
    --timeout 5m
log_success "Hetzner CSI Driver instalado!"
# Verificar StorageClass
log_info "Verificando StorageClass..."
kubectl get storageclass hcloud-volumes
echo ""
############################################################
# INSTALAÇÃO DO NGINX INGRESS CONTROLLER
############################################################
echo "============================================"
echo "  Instalando NGINX Ingress Controller"
echo "============================================"
echo ""
# Detectar localização do cluster para o LoadBalancer
CLUSTER_LOCATION=$(kubectl get nodes -o jsonpath='{.items[0].metadata.labels.topology\.kubernetes\.io/zone}' 2>/dev/null | cut -d'-' -f1)
if [ -z "$CLUSTER_LOCATION" ]; then
    CLUSTER_LOCATION="nbg1"  # Default para Nuremberg
fi
log_info "Localização do cluster: $CLUSTER_LOCATION"
log_info "Instalando NGINX Ingress via Helm..."
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx 2>/dev/null || true
helm repo update ingress-nginx
helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \
    --namespace ingress-nginx \
    --create-namespace \
    --set controller.allowSnippetAnnotations=true \
    --set controller.config.annotations-risk-level=Critical \
    --set controller.admissionWebhooks.enabled=false \
    --set "controller.service.annotations.load-balancer\.hetzner\.cloud/location=${CLUSTER_LOCATION}" \
    --set "controller.service.annotations.load-balancer\.hetzner\.cloud/use-private-ip=true" \
    --wait --timeout 5m
log_success "NGINX Ingress Controller instalado!"
# Aguardar LoadBalancer obter IP
log_info "Aguardando LoadBalancer obter IP externo..."
for i in {1..30}; do
    LB_IP=$(kubectl get svc -n ingress-nginx ingress-nginx-controller \
        -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null)
    if [ -n "$LB_IP" ]; then
        log_success "LoadBalancer IP: $LB_IP"
        break
    fi
    echo -n "."
    sleep 5
done
echo ""
############################################################
# RESUMO FINAL
############################################################
							
							
							
								
							
						
@@ -327,6 +606,19 @@ tofu output -raw kubernetes_api_endpoint 2>/dev/null && echo "" || true
tofu output -raw talos_api_endpoint 2>/dev/null && echo "" || true
echo ""
echo "Componentes instalados:"
echo "  - Hetzner Cloud Controller Manager (CCM)"
echo "  - Cluster Autoscaler (1-5 workers)"
echo "  - Hetzner CSI Driver (StorageClass: hcloud-volumes)"
echo "  - NGINX Ingress Controller + LoadBalancer"
echo ""
# Mostrar IP do LoadBalancer
LB_IP=$(kubectl get svc -n ingress-nginx ingress-nginx-controller \
    -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "pendente")
echo "LoadBalancer IP: $LB_IP"
echo ""
echo "Arquivos gerados:"
echo "  - kubeconfig  : Configuração do kubectl"
echo "  - talosconfig : Configuração do talosctl"
							
							
							
								
							
						
@@ -338,24 +630,12 @@ echo "  # Usar kubectl com este cluster"
echo "  export KUBECONFIG=$SCRIPT_DIR/kubeconfig"
echo "  kubectl get nodes"
echo ""
echo "  # Usar talosctl com este cluster"
echo "  export TALOSCONFIG=$SCRIPT_DIR/talosconfig"
echo "  talosctl -n <IP> health"
echo ""
echo "  # Ver outputs do OpenTofu"
echo "  tofu output"
echo "  # Ver logs do autoscaler"
echo "  kubectl logs -n cluster-autoscaler -l app=cluster-autoscaler -f"
echo ""
echo "  # Destruir infraestrutura (CUIDADO!)"
echo "  ./cleanup.sh"
echo ""
log_success "Setup concluído!"
echo ""
echo "============================================"
echo "  Próximo passo (opcional)"
echo "============================================"
echo ""
echo "  Para habilitar autoscaling de 1-5 workers:"
echo "  ./install-autoscaler.sh"
echo ""
							
							
							
						
 
							
							
								
							
							
						
@@ -26,6 +26,7 @@ machine:
  # Kubelet configuration
  kubelet:
    extraArgs:
      cloud-provider: external
      max-pods: "110"
      kube-reserved: "cpu=200m,memory=300Mi"
      system-reserved: "cpu=200m,memory=200Mi"
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -16,6 +16,7 @@ machine:
  # Kubelet configuration
  kubelet:
    extraArgs:
      cloud-provider: external
      max-pods: "110"
      kube-reserved: "cpu=100m,memory=200Mi"
      system-reserved: "cpu=100m,memory=100Mi"
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -2,28 +2,56 @@
# Variables for Hetzner Talos Kubernetes Cluster
############################################################
# Authentication
# ==========================================================
# AUTENTICAÇÃO
# ==========================================================
variable "hcloud_token" {
  type        = string
  description = "Hetzner Cloud API token"
  sensitive   = true
}
# Cluster Configuration
# ==========================================================
# CONFIGURAÇÃO DO CLUSTER
# ==========================================================
variable "enable_ha" {
  type        = bool
  description = "Enable HA mode with 3 control plane nodes"
  default     = true
}
variable "enable_loadbalancer" {
  type        = bool
  description = "Enable Hetzner Load Balancer for HA access to control plane and ingress"
  default     = true
}
variable "environment" {
  type        = string
  description = "Environment name (prod, staging, dev)"
  default     = "prod"
  validation {
    condition     = contains(["prod", "staging", "dev"], var.environment)
    error_message = "Environment deve ser: prod, staging ou dev."
  }
}
# ==========================================================
# SSH
# ==========================================================
# SSH Configuration
variable "ssh_public_key" {
  type        = string
  description = "Public SSH key for emergency access to nodes"
}
# Talos Configuration
# ==========================================================
# TALOS
# ==========================================================
variable "talos_image_id" {
  type        = number
  description = "ID da imagem Talos customizada na Hetzner (criada na aula-07). Obtenha com: hcloud image list --type snapshot"
							
							
							
								
							
						
@@ -32,30 +60,18 @@ variable "talos_image_id" {
variable "talos_version" {
  type        = string
  description = "Talos version to use"
  default     = "v1.11.2"  # Match the official image version
  default     = "v1.11.2"
  validation {
    condition     = can(regex("^v[0-9]+\\.[0-9]+\\.[0-9]+$", var.talos_version))
    error_message = "talos_version deve seguir o formato semântico: v1.2.3"
  }
}
# Monitoring Configuration
variable "enable_monitoring" {
  type        = bool
  description = "Enable Victoria Metrics monitoring stack"
  default     = true
}
# ==========================================================
# LABELS CUSTOMIZADAS
# ==========================================================
# Auto-scaling Configuration
variable "scale_up_threshold" {
  type        = number
  description = "CPU percentage to trigger scale up"
  default     = 70
}
variable "scale_down_threshold" {
  type        = number
  description = "CPU percentage to trigger scale down"
  default     = 30
}
# Tags for resource management
variable "custom_labels" {
  type        = map(string)
  description = "Custom labels to add to all resources"