#!/bin/bash # ============================================================================= # Aula 15 - APM com Grafana Tempo + OpenTelemetry # ============================================================================= # # Este script instala: # 1. Grafana Tempo (backend de distributed tracing) # 2. OpenTelemetry Collector (recebe e encaminha telemetria) # 3. Datasource do Tempo no Grafana # 4. Demo app instrumentada (Express + PostgreSQL + OTel) # 5. Alertas de latencia (Doherty threshold) # # Pre-requisitos: # - Cluster Kubernetes (aula-08) # - kubectl e helm instalados # - Monitoring namespace com Victoria Metrics + Grafana (aula-12) # # ============================================================================= set -e RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[OK]${NC} $1"; } log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } log_error() { echo -e "${RED}[ERROR]${NC} $1"; } SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ENV_FILE="${SCRIPT_DIR}/.env" # ============================================================================= # VERIFICAR PRE-REQUISITOS # ============================================================================= log_info "Verificando pre-requisitos..." if ! command -v kubectl &> /dev/null; then log_error "kubectl nao encontrado. Instale com: brew install kubectl" exit 1 fi if ! command -v helm &> /dev/null; then log_error "helm nao encontrado. Instale com: brew install helm" exit 1 fi if ! kubectl cluster-info &> /dev/null; then log_error "Nao foi possivel conectar ao cluster Kubernetes" log_info "Verifique se KUBECONFIG esta configurado corretamente" exit 1 fi if ! kubectl get namespace monitoring &> /dev/null; then log_error "Namespace 'monitoring' nao encontrado" log_info "Execute primeiro a aula-12 para instalar Victoria Metrics + Grafana" exit 1 fi log_success "Pre-requisitos verificados" # ============================================================================= # CARREGAR CONFIGURACAO # ============================================================================= if [[ -f "$ENV_FILE" ]]; then log_info "Carregando configuracao local..." source "$ENV_FILE" fi # Herdar configuracao da aula-12 ou aula-10 for ENV_SRC in "${SCRIPT_DIR}/../aula-12/.env" "${SCRIPT_DIR}/../aula-10/.env"; do if [[ -f "$ENV_SRC" && -z "$DOMAIN" ]]; then log_info "Herdando configuracao de $(basename $(dirname $ENV_SRC))..." source "$ENV_SRC" fi done # ============================================================================= # COLETAR CONFIGURACAO # ============================================================================= echo "" echo -e "${CYAN}═══════════════════════════════════════════════════${NC}" echo -e "${CYAN} APM - Grafana Tempo + OpenTelemetry${NC}" echo -e "${CYAN}═══════════════════════════════════════════════════${NC}" echo "" # Dominio if [[ -z "$DOMAIN" ]]; then read -p "Dominio base (ex: kube.quest): " DOMAIN fi log_info "Dominio: ${DOMAIN}" # Demo host if [[ -z "$DEMO_HOST" ]]; then DEFAULT_DEMO="demo.${DOMAIN}" read -p "Hostname da demo app [${DEFAULT_DEMO}]: " DEMO_HOST DEMO_HOST="${DEMO_HOST:-$DEFAULT_DEMO}" fi log_info "Demo app: ${DEMO_HOST}" # TLS (herdar ou perguntar) if [[ -z "$USE_CLOUDFLARE" && -z "$USE_LETSENCRYPT" ]]; then echo "" echo "Configuracao de TLS:" echo " 1) Let's Encrypt (recomendado)" echo " 2) CloudFlare (proxy)" echo " 3) HTTP apenas" read -p "Escolha [1-3]: " TLS_CHOICE case $TLS_CHOICE in 1) USE_CLOUDFLARE=false; USE_LETSENCRYPT=true ;; 2) USE_CLOUDFLARE=true; USE_LETSENCRYPT=false ;; *) USE_CLOUDFLARE=false; USE_LETSENCRYPT=false ;; esac fi # Senha do PostgreSQL da demo if [[ -z "$DEMO_DB_PASSWORD" ]]; then DEMO_DB_PASSWORD=$(openssl rand -base64 24 | tr -dc 'a-zA-Z0-9' | head -c 24) log_info "Senha PostgreSQL da demo gerada automaticamente" fi # Salvar configuracao cat > "$ENV_FILE" << EOF # Configuracao gerada pelo setup.sh - $(date) DOMAIN=${DOMAIN} DEMO_HOST=${DEMO_HOST} DEMO_DB_PASSWORD=${DEMO_DB_PASSWORD} USE_CLOUDFLARE=${USE_CLOUDFLARE} USE_LETSENCRYPT=${USE_LETSENCRYPT} EOF log_success "Configuracao salva" # ============================================================================= # INSTALAR GRAFANA TEMPO # ============================================================================= echo "" log_info "=== Instalando Grafana Tempo ===" helm repo add grafana https://grafana.github.io/helm-charts 2>/dev/null || true helm repo update grafana if helm status tempo -n monitoring &> /dev/null; then log_warn "Tempo ja instalado. Atualizando..." TEMPO_CMD="upgrade" else TEMPO_CMD="install" fi helm ${TEMPO_CMD} tempo grafana/tempo \ --namespace monitoring \ -f "${SCRIPT_DIR}/tempo-values.yaml" \ --wait \ --timeout 5m log_success "Grafana Tempo instalado!" # ============================================================================= # INSTALAR OPENTELEMETRY COLLECTOR # ============================================================================= echo "" log_info "=== Instalando OpenTelemetry Collector ===" helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts 2>/dev/null || true helm repo update open-telemetry if helm status otel-collector -n monitoring &> /dev/null; then log_warn "OTel Collector ja instalado. Atualizando..." OTEL_CMD="upgrade" else OTEL_CMD="install" fi helm ${OTEL_CMD} otel-collector open-telemetry/opentelemetry-collector \ --namespace monitoring \ -f "${SCRIPT_DIR}/otel-collector-values.yaml" \ --wait \ --timeout 5m log_success "OpenTelemetry Collector instalado!" # ============================================================================= # CONFIGURAR DATASOURCE DO TEMPO NO GRAFANA # ============================================================================= echo "" log_info "=== Configurando datasource do Tempo no Grafana ===" kubectl apply -f - <<'DATASOURCE_EOF' apiVersion: v1 kind: ConfigMap metadata: name: grafana-datasource-tempo namespace: monitoring labels: grafana_datasource: "1" data: tempo-datasource.yaml: | apiVersion: 1 datasources: - name: Tempo type: tempo url: http://tempo.monitoring:3100 access: proxy isDefault: false jsonData: tracesToMetrics: datasourceUid: victoriametrics tags: [{key: "http.route"}] serviceMap: datasourceUid: victoriametrics nodeGraph: enabled: true tracesToLogs: datasourceUid: "" DATASOURCE_EOF log_success "Datasource configurado" # Reiniciar Grafana para carregar o novo datasource log_info "Reiniciando Grafana para carregar datasource..." kubectl rollout restart deployment -n monitoring -l app.kubernetes.io/name=grafana 2>/dev/null || true kubectl rollout status deployment -n monitoring -l app.kubernetes.io/name=grafana --timeout=120s 2>/dev/null || true log_success "Grafana reiniciado" # ============================================================================= # DEPLOY DA DEMO APP # ============================================================================= echo "" log_info "=== Deploy da Demo App ===" # Criar namespace kubectl apply -f "${SCRIPT_DIR}/demo-app/k8s/namespace.yaml" log_success "Namespace demo criado" # Criar secret do PostgreSQL com senha gerada kubectl create secret generic demo-postgresql \ --namespace demo \ --from-literal=POSTGRES_PASSWORD="${DEMO_DB_PASSWORD}" \ --dry-run=client -o yaml | kubectl apply -f - log_success "Secret demo-postgresql criado" # Deploy do PostgreSQL kubectl apply -f "${SCRIPT_DIR}/demo-app/k8s/postgresql.yaml" log_info "Aguardando PostgreSQL ficar pronto..." kubectl wait --for=condition=available deployment/demo-postgresql -n demo --timeout=120s 2>/dev/null || true log_success "PostgreSQL pronto" # Criar ConfigMap com o codigo da app kubectl create configmap demo-app-code \ --from-file=app.js="${SCRIPT_DIR}/demo-app/app.js" \ --from-file=tracing.js="${SCRIPT_DIR}/demo-app/tracing.js" \ --from-file=package.json="${SCRIPT_DIR}/demo-app/package.json" \ --namespace demo \ --dry-run=client -o yaml | kubectl apply -f - log_success "ConfigMap demo-app-code criado" # Deploy da app (usando node:20-alpine + init container para npm install) kubectl apply -f - <<'APP_DEPLOY_EOF' apiVersion: apps/v1 kind: Deployment metadata: name: demo-app namespace: demo labels: app: demo-app spec: replicas: 1 selector: matchLabels: app: demo-app template: metadata: labels: app: demo-app spec: initContainers: - name: install-deps image: node:20-alpine command: ["sh", "-c", "cp /code/* /app/ && cd /app && npm install --production"] volumeMounts: - name: code mountPath: /code readOnly: true - name: app mountPath: /app containers: - name: demo-app image: node:20-alpine command: ["node", "--require", "./tracing.js", "app.js"] workingDir: /app ports: - containerPort: 3000 env: - name: PG_HOST value: demo-postgresql - name: PG_PORT value: "5432" - name: PG_USER value: demo - name: PG_PASSWORD valueFrom: secretKeyRef: name: demo-postgresql key: POSTGRES_PASSWORD - name: PG_DATABASE value: demo - name: OTEL_EXPORTER_OTLP_ENDPOINT value: http://otel-collector-opentelemetry-collector.monitoring:4317 - name: OTEL_SERVICE_NAME value: demo-app resources: requests: memory: "64Mi" cpu: "50m" limits: memory: "256Mi" livenessProbe: httpGet: path: /health port: 3000 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 3 failureThreshold: 3 readinessProbe: httpGet: path: /health port: 3000 initialDelaySeconds: 15 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 volumeMounts: - name: app mountPath: /app volumes: - name: code configMap: name: demo-app-code - name: app emptyDir: {} APP_DEPLOY_EOF log_success "Deployment demo-app criado" # Deploy do Service kubectl apply -f "${SCRIPT_DIR}/demo-app/k8s/service.yaml" log_success "Service demo-app criado" # Pausa para configuração de DNS LB_IP=$(kubectl get svc -n ingress-nginx ingress-nginx-controller \ -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "") DEMO_NAME=$(echo "$DEMO_HOST" | cut -d. -f1) echo "" echo -e "${CYAN}═══════════════════════════════════════════════════${NC}" echo -e "${CYAN} Configure o DNS${NC}" echo -e "${CYAN}═══════════════════════════════════════════════════${NC}" echo "" echo "No seu provedor DNS:" echo "" echo -e " ${YELLOW}Tipo:${NC} A" echo -e " ${YELLOW}Nome:${NC} ${DEMO_NAME}" echo -e " ${YELLOW}Valor:${NC} ${GREEN}${LB_IP}${NC}" echo "" if [[ "$USE_LETSENCRYPT" == "true" ]]; then echo -e "${YELLOW}⚠ O Let's Encrypt precisa do DNS configurado para emitir o certificado.${NC}" else echo -e "${YELLOW}⚠ Configure o DNS agora antes de continuar.${NC}" fi echo "" echo -n "Pressione ENTER quando o DNS estiver configurado..." read -r echo "" # Deploy do Ingress (substituir placeholders) TEMP_INGRESS=$(mktemp) sed "s/DEMO_HOST_PLACEHOLDER/${DEMO_HOST}/g" "${SCRIPT_DIR}/demo-app/k8s/ingress.yaml" > "$TEMP_INGRESS" if [[ "$USE_LETSENCRYPT" == "true" ]]; then sed -i.bak "s/CLUSTER_ISSUER_PLACEHOLDER/letsencrypt-prod/g" "$TEMP_INGRESS" rm -f "$TEMP_INGRESS.bak" else # Remover anotacao cert-manager sed -i.bak '/cert-manager.io\/cluster-issuer/d' "$TEMP_INGRESS" rm -f "$TEMP_INGRESS.bak" fi if [[ "$USE_CLOUDFLARE" != "true" && "$USE_LETSENCRYPT" != "true" ]]; then # Remover bloco TLS sed -i.bak '/tls:/,/secretName:/d' "$TEMP_INGRESS" rm -f "$TEMP_INGRESS.bak" fi kubectl apply -f "$TEMP_INGRESS" rm -f "$TEMP_INGRESS" log_success "Ingress demo-app criado" # Aguardar demo-app log_info "Aguardando demo-app ficar pronta..." kubectl wait --for=condition=available deployment/demo-app -n demo --timeout=180s 2>/dev/null || true # ============================================================================= # APLICAR ALERTAS # ============================================================================= echo "" log_info "=== Aplicando alertas de latencia ===" kubectl apply -f "${SCRIPT_DIR}/alerts/latency-alerts.yaml" -n monitoring 2>/dev/null || true log_success "Alertas aplicados" # ============================================================================= # RESUMO FINAL # ============================================================================= PROTOCOL="https" if [[ "$USE_CLOUDFLARE" == "false" && "$USE_LETSENCRYPT" == "false" ]]; then PROTOCOL="http" fi GRAFANA_HOST="${GRAFANA_HOST:-grafana.${DOMAIN}}" echo "" echo -e "${CYAN}═══════════════════════════════════════════════════${NC}" echo -e "${GREEN} APM Instalado com Sucesso!${NC}" echo -e "${CYAN}═══════════════════════════════════════════════════${NC}" echo "" echo "Demo App:" echo " URL: ${PROTOCOL}://${DEMO_HOST}" echo " Rotas:" echo " /health - Health check" echo " /fast - Query rapida (1 query)" echo " /slow - Query lenta (N+1 pattern - 51 queries)" echo " /fixed - Query otimizada (1 JOIN)" echo "" echo "Grafana (Traces):" echo " URL: ${PROTOCOL}://${GRAFANA_HOST}" echo " Datasource: Tempo (ja configurado)" echo " Explore: ${PROTOCOL}://${GRAFANA_HOST}/explore" echo "" echo -e "${CYAN}--- Como testar ---${NC}" echo "" echo " # Gerar traces (executar varias vezes)" echo " curl ${PROTOCOL}://${DEMO_HOST}/fast" echo " curl ${PROTOCOL}://${DEMO_HOST}/slow" echo " curl ${PROTOCOL}://${DEMO_HOST}/fixed" echo "" echo " # Ou via port-forward:" echo " kubectl port-forward -n demo svc/demo-app 3000:3000" echo " curl http://localhost:3000/slow" echo "" echo -e "${CYAN}--- O que observar no Grafana ---${NC}" echo "" echo " 1. Abrir Explore > Selecionar datasource 'Tempo'" echo " 2. Search > Service Name = demo-app" echo " 3. Comparar traces de /fast vs /slow" echo " - /fast: 1 span de query (~5ms)" echo " - /slow: 51 spans de query (~200ms+)" echo " - /fixed: 1 span de query com JOIN (~10ms)" echo "" echo "Verificar pods:" echo " kubectl get pods -n monitoring -l app.kubernetes.io/name=tempo" echo " kubectl get pods -n monitoring -l app.kubernetes.io/name=opentelemetry-collector" echo " kubectl get pods -n demo" echo "" echo "Desinstalar:" echo " ./cleanup.sh" echo "" echo -e "${CYAN}═══════════════════════════════════════════════════${NC}" echo "" log_info "Status dos pods:" echo "" echo "--- monitoring ---" kubectl get pods -n monitoring -l "app.kubernetes.io/name in (tempo, opentelemetry-collector)" 2>/dev/null || true echo "" echo "--- demo ---" kubectl get pods -n demo 2>/dev/null || true echo ""