SRE 话题文档:CI/CD 流水线运维

本文档面向生产环境,涵盖 CI/CD 流水线架构、Jenkins/GitLab CI/ArgoCD 部署、自动化构建发布等核心运维场景。


1. 生产环境部署架构

1.1 架构图(ASCII)

┌─────────────────────────────────────────────────────────────────────────────┐
│                        CI/CD 生产环境架构                                    │
└─────────────────────────────────────────────────────────────────────────────┘

  开发阶段              持续集成              持续部署              运行环境
  ─────────            ─────────            ─────────            ─────────

┌───────────┐     ┌───────────────┐     ┌───────────────┐     ┌───────────┐
│   IDE     │     │   CI Server   │     │   CD Server   │     │  Cluster  │
│  (本地)   │────▶│  (Jenkins/    │────▶│  (ArgoCD/     │────▶│ (K8s/     │
│           │     │   GitLab CI)  │     │   Spinnaker)  │     │  Docker)  │
└───────────┘     └───────┬───────┘     └───────┬───────┘     └───────────┘
                          │                     │
                    ┌─────┴─────┐         ┌─────┴─────┐
                    │           │         │           │
                    ▼           ▼         ▼           ▼
              ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐
              │  Build   │ │  Test    │ │ Registry │ │  Env     │
              │  Agent   │ │  Runner  │ │ (Harbor) │ │ (Dev/    │
              │          │ │          │ │          │ │  Staging/│
              │          │ │          │ │          │ │  Prod)   │
              └──────────┘ └──────────┘ └──────────┘ └──────────┘

┌─────────────────────────────────────────────────────────────────────────────┐
│  代码管理 & 质量控制                                                         │
│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐         │
│  │   GitLab    │  │   SonarQube │  │   Nexus/    │  │   Trivy     │        │
│  │   (代码库)   │  │  (代码质量)  │  │   Artifactory│  │  (安全扫描)  │        │
│  └─────────────┘  └─────────────┘  └─────────────┘  └─────────────┘         │
└─────────────────────────────────────────────────────────────────────────────┘

1.2 Kubernetes 部署配置

# jenkins-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: jenkins
  namespace: ci-cd
spec:
  replicas: 1
  selector:
    matchLabels:
      app: jenkins
  template:
    metadata:
      labels:
        app: jenkins
    spec:
      serviceAccountName: jenkins
      containers:
      - name: jenkins
        image: jenkins/jenkins:lts-jdk17
        ports:
        - containerPort: 8080
          name: web
        - containerPort: 50000
          name: agent
        env:
        - name: JENKINS_OPTS
          value: "--prefix=/jenkins"
        - name: JAVA_OPTS
          value: "-Xmx4g -Xms2g -XX:+UseG1GC"
        volumeMounts:
        - name: jenkins-home
          mountPath: /var/jenkins_home
        - name: docker-sock
          mountPath: /var/run/docker.sock
        resources:
          requests:
            cpu: "1"
            memory: "2Gi"
          limits:
            cpu: "2"
            memory: "4Gi"
        livenessProbe:
          httpGet:
            path: /jenkins/login
            port: 8080
          initialDelaySeconds: 120
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /jenkins/login
            port: 8080
          initialDelaySeconds: 60
          periodSeconds: 5
      volumes:
      - name: jenkins-home
        persistentVolumeClaim:
          claimName: jenkins-pvc
      - name: docker-sock
        hostPath:
          path: /var/run/docker.sock
---
apiVersion: v1
kind: Service
metadata:
  name: jenkins
  namespace: ci-cd
spec:
  ports:
  - port: 8080
    targetPort: 8080
    name: web
  - port: 50000
    targetPort: 50000
    name: agent
  selector:
    app: jenkins
---
# ArgoCD 部署
apiVersion: argoproj.io/v1alpha1
kind: ArgoCD
metadata:
  name: argocd
  namespace: argocd
spec:
  server:
    host: argocd.example.com
    ingress:
      enabled: true
      annotations:
        kubernetes.io/ingress.class: nginx
        cert-manager.io/cluster-issuer: letsencrypt-prod
      tls:
      - hosts:
        - argocd.example.com
        secretName: argocd-tls
  repository:
    enableOauth: true
  dex:
    config:
      connectors:
      - type: github
        id: github
        name: GitHub
        config:
          clientID: $GITHUB_CLIENT_ID
          clientSecret: $GITHUB_CLIENT_SECRET
          orgs:
          - name: my-org

1.3 Docker Compose 部署

# docker-compose.yml - CI/CD 开发环境
version: '3.8'

services:
  # GitLab (代码仓库 + CI)
  gitlab:
    image: gitlab/gitlab-ce:latest
    container_name: gitlab
    hostname: gitlab.example.com
    environment:
      GITLAB_OMNIBUS_CONFIG: |
        external_url 'http://gitlab.example.com'
        gitlab_rails['gitlab_shell_ssh_port'] = 2222
        gitlab_rails['initial_root_password'] = 'Admin123!'
        prometheus_monitoring['enable'] = false
        grafana['enable'] = false
    ports:
      - "80:80"
      - "443:443"
      - "2222:22"
    volumes:
      - gitlab-config:/etc/gitlab
      - gitlab-logs:/var/log/gitlab
      - gitlab-data:/var/opt/gitlab
    networks:
      - ci-cd-net
    restart: unless-stopped

  # Jenkins (CI Server)
  jenkins:
    image: jenkins/jenkins:lts-jdk17
    container_name: jenkins
    ports:
      - "8080:8080"
      - "50000:50000"
    volumes:
      - jenkins-home:/var/jenkins_home
      - /var/run/docker.sock:/var/run/docker.sock
    networks:
      - ci-cd-net
    restart: unless-stopped

  # SonarQube (代码质量)
  sonarqube:
    image: sonarqube:community
    container_name: sonarqube
    environment:
      - SONAR_ES_BOOTSTRAP_CHECKS_DISABLE=true
      - SONARQUBE_JDBC_URL=jdbc:postgresql://postgres:5432/sonar
      - SONARQUBE_JDBC_USERNAME=sonar
      - SONARQUBE_JDBC_PASSWORD=sonar
    ports:
      - "9000:9000"
    volumes:
      - sonarqube-data:/opt/sonarqube/data
      - sonarqube-logs:/opt/sonarqube/logs
    networks:
      - ci-cd-net
    depends_on:
      - postgres
    restart: unless-stopped

  # PostgreSQL (SonarQube 数据库)
  postgres:
    image: postgres:15-alpine
    container_name: postgres
    environment:
      - POSTGRES_USER=sonar
      - POSTGRES_PASSWORD=sonar
      - POSTGRES_DB=sonar
    volumes:
      - postgres-data:/var/lib/postgresql/data
    networks:
      - ci-cd-net
    restart: unless-stopped

  # Harbor (镜像仓库)
  registry:
    image: goharbor/harbor:v2.9.0
    container_name: registry
    ports:
      - "5000:5000"
      - "8443:443"
    volumes:
      - registry-data:/data
    networks:
      - ci-cd-net
    restart: unless-stopped

  # Nexus (制品库)
  nexus:
    image: sonatype/nexus3:latest
    container_name: nexus
    ports:
      - "8081:8081"
      - "8082:8082"
      - "8083:8083"
    volumes:
      - nexus-data:/nexus-data
    networks:
      - ci-cd-net
    restart: unless-stopped

volumes:
  gitlab-config:
  gitlab-logs:
  gitlab-data:
  jenkins-home:
  sonarqube-data:
  sonarqube-logs:
  postgres-data:
  registry-data:
  nexus-data:

networks:
  ci-cd-net:
    driver: bridge

2. Jenkins Pipeline 配置

2.1 声明式 Pipeline

// Jenkinsfile - 完整 CI/CD 流水线
pipeline {
    agent {
        kubernetes {
            yaml '''
            apiVersion: v1
            kind: Pod
            metadata:
              labels:
                app: ci-pipeline
            spec:
              containers:
              - name: maven
                image: maven:3.9-eclipse-temurin-17
                command: ['cat']
                tty: true
                volumeMounts:
                - name: maven-cache
                  mountPath: /root/.m2
              - name: docker
                image: docker:24-cli
                command: ['cat']
                tty: true
                volumeMounts:
                - name: docker-sock
                  mountPath: /var/run/docker.sock
              - name: kubectl
                image: bitnami/kubectl:latest
                command: ['cat']
                tty: true
              volumes:
              - name: maven-cache
                persistentVolumeClaim:
                  claimName: maven-cache-pvc
              - name: docker-sock
                hostPath:
                  path: /var/run/docker.sock
            '''
        }
    }

    environment {
        APP_NAME = 'myapp'
        IMAGE_NAME = "${DOCKER_REGISTRY}/${APP_NAME}"
        IMAGE_TAG = "${BUILD_NUMBER}"
        DEPLOY_ENV = "${params.ENVIRONMENT}"
        SONAR_TOKEN = credentials('sonar-token')
        DOCKER_CREDS = credentials('docker-registry')
        KUBECONFIG = credentials('kubeconfig')
    }

    parameters {
        choice(
            name: 'ENVIRONMENT',
            choices: ['dev', 'staging', 'production'],
            description: 'Deployment environment'
        )
        booleanParam(
            name: 'SKIP_TESTS',
            defaultValue: false,
            description: 'Skip unit and integration tests'
        )
        booleanParam(
            name: 'FORCE_DEPLOY',
            defaultValue: false,
            description: 'Force deployment without approval'
        )
    }

    options {
        timeout(time: 30, unit: 'MINUTES')
        buildDiscarder(logRotator(numToKeepStr: '50'))
        disableConcurrentBuilds()
        retry(3)
        timestamps()
        ansiColor('xterm')
    }

    stages {
        stage('Checkout') {
            steps {
                checkout scm
                script {
                    env.GIT_COMMIT_SHORT = sh(
                        script: 'git rev-parse --short HEAD',
                        returnStdout: true
                    ).trim()
                    env.GIT_BRANCH_NAME = sh(
                        script: 'git rev-parse --abbrev-ref HEAD',
                        returnStdout: true
                    ).trim()
                }
                sh 'echo "Commit: ${GIT_COMMIT_SHORT}, Branch: ${GIT_BRANCH_NAME}"'
            }
        }

        stage('Build') {
            steps {
                container('maven') {
                    sh '''
                        mvn clean package \
                            -DskipTests=${SKIP_TESTS} \
                            -Dbuild.number=${BUILD_NUMBER} \
                            -Dgit.commit=${GIT_COMMIT_SHORT}
                    '''
                }
            }
            post {
                success {
                    archiveArtifacts artifacts: 'target/*.jar', fingerprint: true
                }
            }
        }

        stage('Unit Tests') {
            when {
                expression { return !params.SKIP_TESTS }
            }
            steps {
                container('maven') {
                    sh 'mvn test'
                }
            }
            post {
                always {
                    junit 'target/surefire-reports/*.xml'
                }
            }
        }

        stage('Integration Tests') {
            when {
                expression { return !params.SKIP_TESTS }
            }
            steps {
                container('maven') {
                    sh 'mvn verify -DskipUnitTests'
                }
            }
            post {
                always {
                    junit 'target/failsafe-reports/*.xml'
                }
            }
        }

        stage('Code Quality') {
            steps {
                container('maven') {
                    sh '''
                        mvn sonar:sonar \
                            -Dsonar.host.url=${SONAR_URL} \
                            -Dsonar.token=${SONAR_TOKEN} \
                            -Dsonar.projectKey=${APP_NAME} \
                            -Dsonar.branch.name=${GIT_BRANCH_NAME}
                    '''
                }
            }
        }

        stage('Security Scan') {
            parallel {
                stage('SAST') {
                    steps {
                        container('maven') {
                            sh 'mvn dependency-check:check'
                        }
                    }
                }
                stage('Secrets Scan') {
                    steps {
                        sh '''
                            docker run --rm -v $(pwd):/app \
                                trufflesecurity/trufflehog:latest \
                                filesystem /app --fail
                        '''
                    }
                }
            }
        }

        stage('Build Image') {
            steps {
                container('docker') {
                    script {
                        sh '''
                            docker build \
                                --build-arg BUILD_NUMBER=${BUILD_NUMBER} \
                                --build-arg GIT_COMMIT=${GIT_COMMIT_SHORT} \
                                -t ${IMAGE_NAME}:${IMAGE_TAG} \
                                -t ${IMAGE_NAME}:latest \
                                .
                        '''
                    }
                }
            }
        }

        stage('Image Security Scan') {
            steps {
                container('docker') {
                    sh '''
                        trivy image --severity HIGH,CRITICAL \
                            --exit-code 1 \
                            ${IMAGE_NAME}:${IMAGE_TAG}
                    '''
                }
            }
        }

        stage('Push Image') {
            steps {
                container('docker') {
                    script {
                        sh '''
                            echo ${DOCKER_CREDS_PSW} | docker login \
                                -u ${DOCKER_CREDS_USR} \
                                --password-stdin \
                                ${DOCKER_REGISTRY}

                            docker push ${IMAGE_NAME}:${IMAGE_TAG}
                            docker push ${IMAGE_NAME}:latest
                        '''
                    }
                }
            }
        }

        stage('Deploy to Dev') {
            when {
                expression { return params.ENVIRONMENT == 'dev' }
            }
            steps {
                container('kubectl') {
                    sh '''
                        kubectl set image deployment/${APP_NAME} \
                            ${APP_NAME}=${IMAGE_NAME}:${IMAGE_TAG} \
                            -n dev
                    '''
                }
            }
        }

        stage('Deploy to Staging') {
            when {
                expression { return params.ENVIRONMENT == 'staging' }
            }
            steps {
                container('kubectl') {
                    sh '''
                        kubectl set image deployment/${APP_NAME} \
                            ${APP_NAME}=${IMAGE_NAME}:${IMAGE_TAG} \
                            -n staging
                    '''
                }
            }
        }

        stage('Deploy to Production') {
            when {
                expression { return params.ENVIRONMENT == 'production' }
            }
            stages {
                stage('Approval') {
                    when {
                        expression { return !params.FORCE_DEPLOY }
                    }
                    steps {
                        input message: 'Deploy to Production?',
                            ok: 'Deploy',
                            submitter: 'admin,release-team'
                    }
                }

                stage('Deploy') {
                    steps {
                        container('kubectl') {
                            sh '''
                                kubectl set image deployment/${APP_NAME} \
                                    ${APP_NAME}=${IMAGE_NAME}:${IMAGE_TAG} \
                                    -n production
                            '''
                        }
                    }
                }

                stage('Verify Deployment') {
                    steps {
                        container('kubectl') {
                            sh '''
                                kubectl rollout status deployment/${APP_NAME} \
                                    -n production \
                                    --timeout=5m
                            '''
                        }
                    }
                }
            }
        }
    }

    post {
        always {
            cleanWs()
        }
        success {
            slackSend(
                color: 'good',
                message: "✅ Build Success: ${env.JOB_NAME} #${env.BUILD_NUMBER}\n${env.BUILD_URL}"
            )
        }
        failure {
            slackSend(
                color: 'danger',
                message: "❌ Build Failed: ${env.JOB_NAME} #${env.BUILD_NUMBER}\n${env.BUILD_URL}"
            )
        }
    }
}

2.2 GitLab CI 配置

# .gitlab-ci.yml - GitLab CI/CD 流水线
stages:
  - build
  - test
  - quality
  - security
  - package
  - deploy

variables:
  MAVEN_OPTS: "-Dmaven.repo.local=$CI_PROJECT_DIR/.m2/repository"
  DOCKER_TLS_CERTDIR: ""
  IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHORT_SHA
  SONAR_USER_HOME: "$CI_PROJECT_DIR/.sonar"

cache:
  paths:
    - .m2/repository/
    - target/

# 构建阶段
build:
  stage: build
  image: maven:3.9-eclipse-temurin-17
  script:
    - mvn clean package -DskipTests
  artifacts:
    paths:
      - target/*.jar
    expire_in: 1 hour

# 单元测试
unit-test:
  stage: test
  image: maven:3.9-eclipse-temurin-17
  script:
    - mvn test
  coverage: '/Total.*?([0-9]{1,3})%/'
  artifacts:
    when: always
    reports:
      junit:
        - target/surefire-reports/TEST-*.xml

# 集成测试
integration-test:
  stage: test
  image: maven:3.9-eclipse-temurin-17
  services:
    - name: postgres:15
      alias: postgres
    - name: redis:7
      alias: redis
  variables:
    POSTGRES_DB: testdb
    POSTGRES_USER: test
    POSTGRES_PASSWORD: test
    SPRING_DATASOURCE_URL: jdbc:postgresql://postgres:5432/testdb
    SPRING_REDIS_HOST: redis
  script:
    - mvn verify -DskipUnitTests
  artifacts:
    when: always
    reports:
      junit:
        - target/failsafe-reports/TEST-*.xml

# 代码质量检查
sonarqube-check:
  stage: quality
  image: maven:3.9-eclipse-temurin-17
  variables:
    GIT_DEPTH: "0"
  script:
    - mvn sonar:sonar
      -Dsonar.projectKey=$CI_PROJECT_NAME
      -Dsonar.host.url=$SONAR_URL
      -Dsonar.login=$SONAR_TOKEN
  allow_failure: true

# 安全扫描
security-scan:
  stage: security
  image: docker:24-cli
  services:
    - docker:24-dind
  script:
    # SAST 扫描
    - docker run --rm -v $PWD:/app
      aquasec/trivy:latest fs /app
      --severity HIGH,CRITICAL
      --exit-code 1
  allow_failure: true

# 密钥扫描
secret-scan:
  stage: security
  image: trufflesecurity/trufflehog:latest
  script:
    - trufflehog filesystem . --fail
  allow_failure: true

# 构建 Docker 镜像
build-image:
  stage: package
  image: docker:24-cli
  services:
    - docker:24-dind
  before_script:
    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
  script:
    - docker build
      --build-arg BUILD_NUMBER=$CI_PIPELINE_ID
      --build-arg GIT_COMMIT=$CI_COMMIT_SHORT_SHA
      -t $IMAGE_TAG
      -t $CI_REGISTRY_IMAGE:latest
      .
    - docker push $IMAGE_TAG
    - docker push $CI_REGISTRY_IMAGE:latest
  only:
    - main
    - develop

# 镜像安全扫描
image-scan:
  stage: package
  image: docker:24-cli
  services:
    - docker:24-dind
  script:
    - docker run --rm
      aquasec/trivy:latest image
      --severity HIGH,CRITICAL
      --exit-code 1
      $IMAGE_TAG
  allow_failure: true
  needs:
    - build-image

# 部署到开发环境
deploy-dev:
  stage: deploy
  image: bitnami/kubectl:latest
  environment:
    name: development
    url: https://dev.example.com
  script:
    - kubectl config use-context dev-cluster
    - kubectl set image deployment/myapp
      myapp=$IMAGE_TAG
      -n dev
    - kubectl rollout status deployment/myapp -n dev --timeout=3m
  only:
    - develop

# 部署到预发环境
deploy-staging:
  stage: deploy
  image: bitnami/kubectl:latest
  environment:
    name: staging
    url: https://staging.example.com
  script:
    - kubectl config use-context staging-cluster
    - kubectl set image deployment/myapp
      myapp=$IMAGE_TAG
      -n staging
    - kubectl rollout status deployment/myapp -n staging --timeout=5m
  only:
    - main
  when: manual

# 部署到生产环境
deploy-prod:
  stage: deploy
  image: bitnami/kubectl:latest
  environment:
    name: production
    url: https://www.example.com
  script:
    - kubectl config use-context prod-cluster
    - kubectl set image deployment/myapp
      myapp=$IMAGE_TAG
      -n production
    - kubectl rollout status deployment/myapp -n production --timeout=5m
  only:
    - main
  when: manual
  only:
    - tags

# 自动回滚
.rollback:
  stage: deploy
  image: bitnami/kubectl:latest
  script:
    - kubectl rollout undo deployment/myapp -n $DEPLOY_ENV
  when: manual

3. ArgoCD GitOps 配置

3.1 Application 配置

# application.yaml - ArgoCD 应用定义
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
  name: myapp
  namespace: argocd
  finalizers:
    - resources-finalizer.argocd.argoproj.io
spec:
  project: default

  source:
    repoURL: https://gitlab.example.com/devops/myapp-gitops.git
    targetRevision: main
    path: overlays/production

    # Kustomize 配置
    kustomize:
      namePrefix: prod-
      images:
        - myregistry.example.com/myapp:latest

    # 或 Helm 配置
    # helm:
    #   valueFiles:
    #     - values-production.yaml
    #   parameters:
    #     - name: image.tag
    #       value: v1.2.3

  destination:
    server: https://kubernetes.default.svc
    namespace: production

  syncPolicy:
    automated:
      prune: true
      selfHeal: true
      allowEmpty: false
    syncOptions:
      - CreateNamespace=true
      - PrunePropagationPolicy=foreground
      - PruneLast=true
      - ApplyOutOfSyncOnly=true
    retry:
      limit: 5
      backoff:
        duration: 5s
        factor: 2
        maxDuration: 3m

  ignoreDifferences:
    - group: apps
      kind: Deployment
      jsonPointers:
        - /spec/replicas

  info:
    - name: Description
      value: Production deployment for myapp
---
# ApplicationSet - 多环境部署
apiVersion: argoproj.io/v1alpha1
kind: ApplicationSet
metadata:
  name: myapp-environments
  namespace: argocd
spec:
  generators:
    - list:
        elements:
          - env: dev
            namespace: dev
            revision: develop
          - env: staging
            namespace: staging
            revision: main
          - env: production
            namespace: production
            revision: main
  template:
    metadata:
      name: 'myapp-{{env}}'
    spec:
      project: default
      source:
        repoURL: https://gitlab.example.com/devops/myapp-gitops.git
        targetRevision: '{{revision}}'
        path: overlays/{{env}}
      destination:
        server: https://kubernetes.default.svc
        namespace: '{{namespace}}'
      syncPolicy:
        automated:
          prune: true
          selfHeal: true

3.2 Kustomize 项目结构

myapp-gitops/
├── base/
│   ├── deployment.yaml
│   ├── service.yaml
│   ├── configmap.yaml
│   ├── ingress.yaml
│   └── kustomization.yaml
├── overlays/
│   ├── dev/
│   │   ├── kustomization.yaml
│   │   ├── patches/
│   │   │   └── deployment-patch.yaml
│   │   └── kustomization.yaml
│   ├── staging/
│   │   ├── kustomization.yaml
│   │   └── patches/
│   └── production/
│       ├── kustomization.yaml
│       └── patches/
└── README.md
# base/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

resources:
  - deployment.yaml
  - service.yaml
  - configmap.yaml
  - ingress.yaml

commonLabels:
  app: myapp
  team: platform

images:
  - name: myregistry.example.com/myapp
    newTag: latest

configMapGenerator:
  - name: app-config
    files:
      - config.yaml

secretGenerator:
  - name: app-secrets
    type: Opaque
    literals:
      - DB_PASSWORD=placeholder
---
# overlays/production/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

namespace: production

resources:
  - ../../base

patchesStrategicMerge:
  - patches/deployment-patch.yaml

images:
  - name: myregistry.example.com/myapp
    newTag: v1.2.3

replicas:
  - name: myapp
    count: 3

configMapGenerator:
  - name: app-config
    behavior: merge
    literals:
      - LOG_LEVEL=info
      - ENV=production
---
# overlays/production/patches/deployment-patch.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: myapp
spec:
  template:
    spec:
      containers:
        - name: myapp
          resources:
            requests:
              cpu: "500m"
              memory: "512Mi"
            limits:
              cpu: "2"
              memory: "2Gi"
          env:
            - name: JAVA_OPTS
              value: "-Xmx1g -Xms512m"

4. 监控与告警

4.1 流水线指标

# prometheus-ci-cd.yml
scrape_configs:
  - job_name: 'jenkins'
    metrics_path: /jenkins/prometheus
    static_configs:
      - targets: ['jenkins:8080']

  - job_name: 'gitlab'
    static_configs:
      - targets: ['gitlab:8080']

  - job_name: 'argocd'
    static_configs:
      - targets: ['argocd-metrics:8082']

4.2 告警规则

# ci-cd-alerts.yml
groups:
  - name: ci-cd-alerts
    rules:
      # 构建失败告警
      - alert: BuildFailureRate
        expr: |
          rate(jenkins_builds_failed_total[1h]) 
          / rate(jenkins_builds_total[1h]) > 0.1
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "构建失败率过高"
          description: "过去 1 小时构建失败率 {{ $value | printf \"%.1f\" }}%"

      # 构建时间过长
      - alert: BuildDurationHigh
        expr: |
          histogram_quantile(0.95, 
            rate(jenkins_build_duration_seconds_bucket[1h])
          ) > 1800
        for: 15m
        labels:
          severity: warning
        annotations:
          summary: "构建时间过长"
          description: "P95 构建时间 {{ $value | printf \"%.0f\" }}秒"

      # ArgoCD 同步失败
      - alert: ArgoCDSyncFailed
        expr: argocd_app_sync_status{sync_status!="Synced"} == 1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "ArgoCD 同步失败"
          description: "应用 {{ $labels.name }} 同步状态: {{ $labels.sync_status }}"

      # 应用健康状态异常
      - alert: ArgoCDAppUnhealthy
        expr: argocd_app_health_status{health_status!="Healthy"} == 1
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "应用健康状态异常"
          description: "应用 {{ $labels.name }} 健康状态: {{ $labels.health_status }}"

      # GitLab CI 队列积压
      - alert: GitLabCIQueueBacklog
        expr: gitlab_ci_pending_jobs > 50
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "GitLab CI 队列积压"
          description: "待处理任务数: {{ $value }}"

5. 最佳实践

5.1 流水线设计原则

原则 说明
快速反馈 构建时间控制在 10 分钟内
失败快速 失败时快速终止,节省资源
幂等性 多次执行结果一致
可重复 相同输入产生相同输出
安全 凭证管理、权限控制

5.2 部署策略

策略 适用场景 风险等级
滚动更新 常规部署
蓝绿部署 关键服务
金丝雀发布 大规模服务
A/B 测试 功能验证

6. 参考资料


文档版本: 1.0 更新日期: 2024-01-15 适用环境: Jenkins 2.x, GitLab CE/EE, ArgoCD 2.x

results matching ""

    No results matching ""