SRE 话题文档 #3:Nginx / Envoy 流量管理与优化
目录
1. 生产环境部署架构
1.1 整体架构图
┌─────────────────────────────────────────────────────────────┐
│ 外部流量入口 │
└─────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────┐
│ 负载均衡层 (LB) │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ LB Node 1 │ │ LB Node 2 │ │ LB Node 3 │ │
│ │ (Nginx) │ │ (Nginx) │ │ (Nginx) │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │ │ │ │
│ └──────────────────┼──────────────────┘ │
│ │ Keepalived VIP │
└──────────────────────────────┼─────────────────────────────┘
│
┌──────────────────────────────┼─────────────────────────────┐
│ ▼ │
│ 服务网格入口层 (Ingress Gateway) │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ Envoy Proxy Mesh │ │
│ │ ┌───────────┐ ┌───────────┐ ┌───────────┐ │ │
│ │ │ Envoy 1 │ │ Envoy 2 │ │ Envoy 3 │ │ │
│ │ │ (Ingress) │ │ (Ingress) │ │ (Ingress) │ │ │
│ │ └───────────┘ └───────────┘ └───────────┘ │ │
│ └─────────────────────────────────────────────────────┘ │
│ │ xDS API (动态配置) │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ Control Plane │ │
│ │ (Istiod / Envoy Control Plane / 自研控制面) │ │
│ └─────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘
│
┌──────────────────────────────┼─────────────────────────────┐
│ ▼ │
│ 业务服务层 (K8s Pods) │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ Kubernetes Cluster │ │
│ │ │ │
│ │ ┌─────────────┐ ┌─────────────┐ ┌───────────┐ │ │
│ │ │ API Service │ │ Web Service │ │ Svc Mesh │ │ │
│ │ │ (Sidecar) │ │ (Sidecar) │ │ (Sidecar) │ │ │
│ │ │ Envoy │ │ Envoy │ │ Envoy │ │ │
│ │ └─────────────┘ └─────────────┘ └───────────┘ │ │
│ │ │ │
│ └─────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘
│
┌──────────────────────────────┼─────────────────────────────┐
│ ▼ │
│ 后端服务层 │
│ ┌───────────┐ ┌───────────┐ ┌───────────┐ │
│ │ Redis │ │ MySQL │ │ MQ │ │
│ │ Cluster │ │ Cluster │ │ Cluster │ │
│ └───────────┘ └───────────┘ └───────────┘ │
└─────────────────────────────────────────────────────────────┘
1.2 Kubernetes 部署配置 - Nginx Ingress Controller
# nginx-ingress-controller-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-ingress-controller
namespace: ingress-nginx
labels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
spec:
replicas: 3
selector:
matchLabels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
template:
metadata:
labels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "10254"
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchLabels:
app.kubernetes.io/name: ingress-nginx
topologyKey: kubernetes.io/hostname
containers:
- name: nginx-ingress-controller
image: k8s.gcr.io/ingress-nginx/controller:v1.9.5
args:
- /nginx-ingress-controller
- --configmap=$(POD_NAMESPACE)/nginx-configuration
- --tcp-services-configmap=$(POD_NAMESPACE)/tcp-services
- --udp-services-configmap=$(POD_NAMESPACE)/udp-services
- --publish-service=$(POD_NAMESPACE)/ingress-nginx
- --annotations-prefix=nginx.ingress.kubernetes.io
- --enable-metrics=true
- --metrics-per-host=true
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: LD_PRELOAD
value: /usr/local/lib/libmimalloc.so
resources:
requests:
cpu: "500m"
memory: "512Mi"
limits:
cpu: "2000m"
memory: "2Gi"
ports:
- name: http
containerPort: 80
protocol: TCP
- name: https
containerPort: 443
protocol: TCP
- name: metrics
containerPort: 10254
protocol: TCP
livenessProbe:
httpGet:
path: /healthz
port: 10254
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /healthz
port: 10254
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
add:
- NET_BIND_SERVICE
runAsNonRoot: true
runAsUser: 101
seccompProfile:
type: RuntimeDefault
serviceAccountName: nginx-ingress-serviceaccount
terminationGracePeriodSeconds: 300
---
# nginx-configuration-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: nginx-configuration
namespace: ingress-nginx
data:
# Worker 进程配置
worker-processes: "auto"
worker-cpu-affinity: "auto"
worker-rlimit-nofile: "65536"
# 连接配置
worker-connections: "16384"
multi-accept: "on"
use-epoll: "true"
# Keepalive 配置
keep-alive: "75"
keep-alive-requests: "1000"
upstream-keepalive-connections: "1000"
upstream-keepalive-requests: "10000"
upstream-keepalive-timeout: "60"
# 超时配置
proxy-connect-timeout: "10"
proxy-read-timeout: "60"
proxy-send-timeout: "60"
# 请求体配置
client-body-buffer-size: "16k"
client-body-timeout: "60"
client-max-body-size: "50m"
# 缓冲配置
proxy-buffer-size: "16k"
proxy-buffers-number: "4"
proxy-busy-buffers-size: "32k"
# Gzip 压缩
gzip: "true"
gzip-level: "6"
gzip-min-length: "1024"
gzip-types: "text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript"
# 日志配置
log-format-upstream: '{"time": "$time_iso8601", "remote_addr": "$remote_addr", "request": "$request", "status": "$status", "body_bytes_sent": "$body_bytes_sent", "request_time": "$request_time", "upstream_response_time": "$upstream_response_time", "upstream_addr": "$upstream_addr", "http_user_agent": "$http_user_agent", "request_id": "$req_id"}'
# 限流配置
limit-connections: "500"
limit-rps: "1000"
limit-connections-zone: "$binary_remote_addr zone=conn_limit:10m"
limit-req-zone: "$binary_remote_addr zone=req_limit:10m rate=100r/s"
1.3 Kubernetes 部署配置 - Envoy Gateway
# envoy-gateway-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: envoy-gateway
namespace: envoy-gateway-system
labels:
app: envoy-gateway
spec:
replicas: 3
selector:
matchLabels:
app: envoy-gateway
template:
metadata:
labels:
app: envoy-gateway
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "19001"
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchLabels:
app: envoy-gateway
topologyKey: kubernetes.io/hostname
containers:
- name: envoy-gateway
image: envoyproxy/gateway:v1.0.2
args:
- "serve"
- "--config-path=/etc/envoy-gateway/config.yaml"
env:
- name: ENVOY_GATEWAY_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
resources:
requests:
cpu: "500m"
memory: "512Mi"
limits:
cpu: "2000m"
memory: "2Gi"
ports:
- name: http
containerPort: 8080
protocol: TCP
- name: https
containerPort: 8443
protocol: TCP
- name: admin
containerPort: 19000
protocol: TCP
- name: metrics
containerPort: 19001
protocol: TCP
volumeMounts:
- name: config
mountPath: /etc/envoy-gateway
livenessProbe:
httpGet:
path: /healthz
port: 19000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /readyz
port: 19000
initialDelaySeconds: 5
periodSeconds: 5
volumes:
- name: config
configMap:
name: envoy-gateway-config
---
# envoy-gateway-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: envoy-gateway-config
namespace: envoy-gateway-system
data:
config.yaml: |
apiVersion: gateway.envoyproxy.io/v1alpha1
kind: EnvoyGateway
gateway:
controllerName: gateway.envoyproxy.io/gatewayclass-controller
provider:
type: Kubernetes
kubernetes:
rateLimit: true
logging:
level:
default: info
admin:
enable: true
address:
socketAddress:
address: 0.0.0.0
portValue: 19000
telemetry:
metrics:
prometheus:
enable: true
accessLog:
enable: true
format:
type: JSON
json:
timestamp: "%START_TIME%"
method: "%REQ(:METHOD)%"
path: "%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
protocol: "%PROTOCOL%"
response_code: "%RESPONSE_CODE%"
response_flags: "%RESPONSE_FLAGS%"
bytes_received: "%BYTES_RECEIVED%"
bytes_sent: "%BYTES_SENT%"
duration: "%DURATION%"
upstream_service_time: "%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%"
x_forwarded_for: "%REQ(X-FORWARDED-FOR)%"
user_agent: "%REQ(USER-AGENT)%"
request_id: "%REQ(X-REQUEST-ID)%"
upstream_address: "%UPSTREAM_REMOTE_ADDRESS%"
upstream_cluster: "%UPSTREAM_CLUSTER%"
---
# EnvoyProxy 自定义资源
apiVersion: gateway.envoyproxy.io/v1alpha1
kind: EnvoyProxy
metadata:
name: envoy-proxy-config
namespace: envoy-gateway-system
spec:
provider:
type: Kubernetes
kubernetes:
envoyDeployment:
replicas: 3
container:
resources:
requests:
cpu: "500m"
memory: "512Mi"
limits:
cpu: "2000m"
memory: "2Gi"
bootstrap: |
admin:
address:
socket_address:
address: 0.0.0.0
port_value: 19000
static_resources:
listeners:
- name: prometheus_listener
address:
socket_address:
address: 0.0.0.0
port_value: 19001
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: prometheus
route_config:
virtual_hosts:
- name: prometheus
domains:
- "*"
routes:
- match:
prefix: /metrics
route:
cluster: prometheus_cluster
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
clusters:
- name: prometheus_cluster
type: STATIC
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: prometheus_cluster
endpoints:
- lb_endpoints:
- endpoint:
address:
pipe:
path: /tmp/envoy.admin
1.4 Docker Compose 部署配置
# docker-compose-nginx.yaml
version: '3.8'
services:
nginx-lb:
image: nginx:1.25.4-alpine
container_name: nginx-lb
restart: unless-stopped
ports:
- "80:80"
- "443:443"
- "9113:9113" # Prometheus metrics
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/conf.d:/etc/nginx/conf.d:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
- ./nginx/logs:/var/log/nginx
environment:
- TZ=Asia/Shanghai
- NGINX_WORKER_PROCESSES=auto
- NGINX_WORKER_CONNECTIONS=16384
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '0.5'
memory: 512M
healthcheck:
test: ["CMD", "nginx", "-t"]
interval: 30s
timeout: 10s
retries: 3
networks:
- frontend
- backend
nginx-exporter:
image: nginx/nginx-prometheus-exporter:1.1.0
container_name: nginx-exporter
restart: unless-stopped
ports:
- "9113:9113"
command:
- '-nginx.scrape-uri=http://nginx-lb:80/stub_status'
depends_on:
- nginx-lb
networks:
- monitoring
# Envoy Gateway
envoy-gateway:
image: envoyproxy/envoy:v1.29.0
container_name: envoy-gateway
restart: unless-stopped
ports:
- "8080:8080"
- "8443:8443"
- "19000:19000"
- "19001:19001"
volumes:
- ./envoy/envoy.yaml:/etc/envoy/envoy.yaml:ro
- ./envoy/cds.yaml:/etc/envoy/cds.yaml:ro
- ./envoy/lds.yaml:/etc/envoy/lds.yaml:ro
environment:
- TZ=Asia/Shanghai
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '0.5'
memory: 512M
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:19000/ready"]
interval: 30s
timeout: 10s
retries: 3
networks:
- frontend
- backend
- monitoring
networks:
frontend:
driver: bridge
backend:
driver: bridge
monitoring:
driver: bridge
external: true
2. 关键参数调优
2.1 Nginx 核心配置详解
# /etc/nginx/nginx.conf
# ========================================
# Nginx 主配置文件 - 生产环境优化版
# ========================================
# 运行用户
user nginx;
# Worker 进程数:auto = CPU 核心数
# 高并发场景建议设为 CPU 核心数或核心数 * 2
worker_processes auto;
# 每个 worker 最大打开文件数
# 必须小于系统 ulimit -n 设置
worker_rlimit_nofile 65536;
# 错误日志配置
# 生产环境建议 error 级别,调试时可设为 debug
error_log /var/log/nginx/error.log error;
# PID 文件位置
pid /var/run/nginx.pid;
# 包含动态模块
include /etc/nginx/modules-enabled/*.conf;
events {
# 使用 epoll 事件模型(Linux 高性能)
use epoll;
# 每个 worker 最大连接数
# 总最大连接数 = worker_processes * worker_connections
worker_connections 16384;
# 允许 worker 同时接受多个连接
multi_accept on;
# 优化连接处理
# 在高并发场景下减少惊群效应
accept_mutex off;
# 连接超时配置
worker_aio_requests 128;
}
http {
# ========================================
# 基础配置
# ========================================
# 包含 MIME 类型定义
include /etc/nginx/mime.types;
default_type application/octet-stream;
# 字符集
charset utf-8;
# 服务器 tokens(隐藏版本号)
server_tokens off;
# 优化文件读取
sendfile on;
tcp_nopush on;
tcp_nodelay on;
# ========================================
# 超时配置
# ========================================
# Keep-Alive 超时
keepalive_timeout 75s;
# 单个连接最大请求数
keepalive_requests 10000;
# 客户端请求超时
client_header_timeout 60s;
client_body_timeout 60s;
# 响应发送超时
send_timeout 60s;
# ========================================
# 请求体配置
# ========================================
# 客户端请求体最大大小
client_max_body_size 50m;
# 请求体缓冲区大小
client_body_buffer_size 128k;
# 请求头缓冲区大小
client_header_buffer_size 4k;
# 大请求头缓冲区
large_client_header_buffers 4 32k;
# ========================================
# 代理缓冲配置
# ========================================
# 启用代理缓冲
proxy_buffering on;
# 代理缓冲区大小
proxy_buffer_size 16k;
# 代理缓冲区数量和大小
proxy_buffers 8 32k;
# 忙碌缓冲区大小
proxy_busy_buffers_size 64k;
# 临时文件写入阈值
proxy_max_temp_file_size 1024m;
# ========================================
# 上游 Keep-Alive 配置
# ========================================
# 上游连接池配置
upstream backend_pool {
zone backend_pool 64k;
# 负载均衡策略:least_conn / ip_hash / hash / random
least_conn;
# 后端服务器列表
server 10.0.0.1:8080 weight=5 max_conns=1000;
server 10.0.0.2:8080 weight=5 max_conns=1000;
server 10.0.0.3:8080 weight=5 max_conns=1000 backup;
# Keep-Alive 连接池
keepalive 1000;
keepalive_requests 10000;
keepalive_timeout 60s;
}
# ========================================
# 限流配置
# ========================================
# 连接数限制
limit_conn_zone $binary_remote_addr zone=conn_limit:10m;
# 请求速率限制
limit_req_zone $binary_remote_addr zone=req_limit:10m rate=100r/s;
# 按 URI 限制
limit_req_zone $binary_remote_addr zone=api_limit:10m rate=50r/s;
# ========================================
# 缓存配置
# ========================================
# 静态文件缓存
open_file_cache max=10000 inactive=30s;
open_file_cache_valid 60s;
open_file_cache_min_uses 2;
open_file_cache_errors on;
# 代理缓存路径
proxy_cache_path /var/cache/nginx/proxy levels=1:2 keys_zone=proxy_cache:100m inactive=60m max_size=10g;
# ========================================
# Gzip 压缩配置
# ========================================
gzip on;
gzip_vary on;
gzip_proxied any;
gzip_comp_level 6;
gzip_min_length 1024;
gzip_buffers 16 8k;
gzip_http_version 1.1;
gzip_types
text/plain
text/css
text/xml
text/javascript
application/json
application/javascript
application/xml
application/xml+rss
application/atom+xml
image/svg+xml;
# ========================================
# 安全头配置
# ========================================
# 防止点击劫持
add_header X-Frame-Options "SAMEORIGIN" always;
# 防止 MIME 类型嗅探
add_header X-Content-Type-Options "nosniff" always;
# XSS 保护
add_header X-XSS-Protection "1; mode=block" always;
# 引用策略
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
# ========================================
# 日志格式配置
# ========================================
# JSON 格式日志(推荐用于 ELK)
log_format json_combined escape=json
'{'
'"time_local":"$time_local",'
'"remote_addr":"$remote_addr",'
'"remote_user":"$remote_user",'
'"request":"$request",'
'"status":"$status",'
'"body_bytes_sent":"$body_bytes_sent",'
'"request_time":"$request_time",'
'"http_referrer":"$http_referer",'
'"http_user_agent":"$http_user_agent",'
'"http_x_forwarded_for":"$http_x_forwarded_for",'
'"upstream_addr":"$upstream_addr",'
'"upstream_status":"$upstream_status",'
'"upstream_response_time":"$upstream_response_time",'
'"upstream_connect_time":"$upstream_connect_time",'
'"upstream_header_time":"$upstream_header_time",'
'"request_id":"$request_id",'
'"server_name":"$server_name",'
'"request_length":"$request_length"'
'}';
# 访问日志
access_log /var/log/nginx/access.log json_combined buffer=32k flush=5s;
# ========================================
# Stub Status(用于 Prometheus 监控)
# ========================================
server {
listen 127.0.0.1:9113;
server_name localhost;
location /stub_status {
stub_status on;
access_log off;
allow 127.0.0.1;
deny all;
}
}
# 包含其他配置文件
include /etc/nginx/conf.d/*.conf;
}
2.2 Nginx 站点配置详解
# /etc/nginx/conf.d/api-server.conf
# ========================================
# API 服务站点配置
# ========================================
# 上游服务定义
upstream api_backend {
zone api_backend 64k;
# 负载均衡:最少连接
least_conn;
# 后端服务器配置
# weight: 权重
# max_fails: 最大失败次数
# fail_timeout: 失败超时时间
# max_conns: 最大连接数
# slow_start: 慢启动时间
server 10.0.1.1:8080 weight=5 max_fails=3 fail_timeout=30s max_conns=500;
server 10.0.1.2:8080 weight=5 max_fails=3 fail_timeout=30s max_conns=500;
server 10.0.1.3:8080 weight=5 max_fails=3 fail_timeout=30s max_conns=500;
# 备用服务器
server 10.0.1.4:8080 backup;
# Keep-Alive 连接池
keepalive 1000;
keepalive_requests 10000;
keepalive_timeout 60s;
# 健康检查(需要 active health check 模块)
# health_check interval=5s fails=3 passes=2;
}
# HTTP 到 HTTPS 重定向
server {
listen 80;
server_name api.example.com;
# 所有 HTTP 请求重定向到 HTTPS
return 301 https://$server_name$request_uri;
}
# HTTPS 主服务
server {
listen 443 ssl http2;
server_name api.example.com;
# ========================================
# SSL/TLS 配置
# ========================================
# SSL 证书
ssl_certificate /etc/nginx/ssl/api.example.com.crt;
ssl_certificate_key /etc/nginx/ssl/api.example.com.key;
# SSL 协议
ssl_protocols TLSv1.2 TLSv1.3;
# 加密套件
ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384;
# 优先使用服务器加密套件
ssl_prefer_server_ciphers on;
# SSL 会话配置
ssl_session_cache shared:SSL:50m;
ssl_session_timeout 1d;
ssl_session_tickets off;
# OCSP Stapling
ssl_stapling on;
ssl_stapling_verify on;
ssl_trusted_certificate /etc/nginx/ssl/chain.crt;
resolver 8.8.8.8 8.8.4.4 valid=300s;
resolver_timeout 5s;
# HSTS
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" always;
# ========================================
# 限流配置
# ========================================
# 连接数限制
limit_conn conn_limit 100;
# 请求速率限制
limit_req zone=req_limit burst=200 nodelay;
# API 特殊限流
location /api/v1/ {
limit_req zone=api_limit burst=100 nodelay;
# 限流日志级别
limit_req_log_level warn;
# 限流状态码
limit_req_status 429;
}
# ========================================
# 代理配置
# ========================================
location / {
# 代理到上游
proxy_pass http://api_backend;
# 代理头配置
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Request-ID $request_id;
# 超时配置
proxy_connect_timeout 10s;
proxy_read_timeout 60s;
proxy_send_timeout 60s;
# HTTP 版本(支持 Keep-Alive)
proxy_http_version 1.1;
proxy_set_header Connection "";
# 缓冲配置
proxy_buffering on;
proxy_buffer_size 16k;
proxy_buffers 8 32k;
# 禁用缓存(API 场景)
proxy_cache off;
proxy_no_cache 1;
# 错误页面
proxy_intercept_errors on;
error_page 500 502 503 504 /50x.html;
}
# ========================================
# 健康检查端点
# ========================================
location /health {
access_log off;
return 200 'OK';
add_header Content-Type text/plain;
}
location /ready {
access_log off;
return 200 'Ready';
add_header Content-Type text/plain;
}
# ========================================
# 监控端点
# ========================================
location /nginx_status {
stub_status on;
access_log off;
allow 10.0.0.0/8;
allow 172.16.0.0/12;
allow 192.168.0.0/16;
deny all;
}
# ========================================
# 静态资源配置
# ========================================
location /static/ {
alias /var/www/static/;
# 缓存配置
expires 30d;
add_header Cache-Control "public, immutable";
# 文件缓存
open_file_cache max=1000 inactive=60s;
}
}
2.3 Envoy 核心配置详解
```yaml
/etc/envoy/envoy.yaml
========================================
Envoy 静态配置文件 - 生产环境优化版
========================================
admin: address: socket_address: address: 0.0.0.0 port_value: 19000
访问日志
access_log_path: /var/log/envoy/admin_access.log
static_resources:
========================================
监听器配置
========================================
listeners:
# HTTP 入口监听器
- name: listener_http
address:
socket_address:
address: 0.0.0.0
port_value: 8080
per_connection_buffer_limit_bytes: 32768
# 监听器过滤器
listener_filters:
- name: envoy.filters.listener.original_dst
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.listener.original_dst.v3.OriginalDst
# 连接限制
connection_balance_config:
exact_balance: {}
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: ingress_http
# 请求 ID 配置
generate_request_id: true
preserve_external_request_id: true
# 使用远程地址
use_remote_address: true
skip_xff_append: false
xff_num_trusted_hops: 1
# 流量镜像
# start_reqeust_headers_time: true
# 请求头限制
max_request_headers_kb: 80
# 流控制
stream_idle_timeout: 300s
request_timeout: 60s
drain_timeout: 5s
# 延迟关闭
delayed_close_timeout: 1s
# 路由配置
route_config:
name: local_route
virtual_hosts:
- name: backend_services
domains:
- "*"
routes:
# API 路由
- match:
prefix: "/api/v1"
route:
cluster: api_cluster
timeout: 60s
retry_policy:
retry_on: "5xx,connect-failure,refused-stream"
num_retries: 3
per_try_timeout: 20s
retry_back_off:
base_interval: 0.5s
max_interval: 30s
# 限流配置
rate_limits:
- actions:
- remote_address: {}
# 默认路由
- match:
prefix: "/"
route:
cluster: web_cluster
timeout: 30s
# 健康检查路由
- match:
prefix: "/health"
direct_response:
status: 200
body:
inline_string: "OK"
# HTTP 过滤器
http_filters:
# 本地限流
- name: envoy.filters.http.local_ratelimit
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.local_ratelimit.v3.LocalRateLimit
stat_prefix: local_rate_limiter
token_bucket:
max_tokens: 1000
tokens_per_fill: 1000
fill_interval: 1s
filter_enabled:
runtime_key: local_rate_limit_enabled
default_value:
numerator: 100
denominator: HUNDRED
filter_enforced:
runtime_key: local_rate_limit_enforced
default_value:
numerator: 100
denominator: HUNDRED
response_headers_to_add:
- append: false
header:
key: X-RateLimit-Limit
value: "1000"
local_rate_limit_per_downstream_connection: false
# 外部限流(可选)
- name: envoy.filters.http.ratelimit
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.ratelimit.v3.RateLimit
domain: envoy_rate_limit
failure_mode_deny: false
timeout: 0.25s
rate_limit_service:
grpc_service:
envoy_grpc:
cluster_name: rate_limit_cluster
# JWT 认证(可选)
- name: envoy.filters.http.jwt_authn
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.jwt_authn.v3.JwtAuthentication
providers:
provider1:
issuer: "https://auth.example.com"
audiences:
- "api.example.com"
remote_jwks:
http_uri:
uri: "https://auth.example.com/.well-known/jwks.json"
cluster: auth_cluster
timeout: 5s
cache_duration: 300s
rules:
- match:
prefix: "/api/v1"
requires:
provider_name: provider1
# 故障注入(测试用)
- name: envoy.filters.http.fault
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.fault.v3.HTTPFault
max_active_faults: 100
abort:
http_status: 503
percentage:
numerator: 0
denominator: HUNDRED
delay:
fixed_delay: 0s
percentage:
numerator: 0
denominator: HUNDRED
# CORS
- name: envoy.filters.http.cors
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.cors.v3.Cors
# 路由器
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
dynamic_stats: true
start_child_span: true
# 访问日志
access_log:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: /var/log/envoy/access.log
log_format:
json_format:
timestamp: "%START_TIME%"
method: "%REQ(:METHOD)%"
path: "%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
protocol: "%PROTOCOL%"
response_code: "%RESPONSE_CODE%"
response_flags: "%RESPONSE_FLAGS%"
bytes_received: "%BYTES_RECEIVED%"
bytes_sent: "%BYTES_SENT%"
duration: "%DURATION%"
upstream_service_time: "%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%"
x_forwarded_for: "%REQ(X-FORWARDED-FOR)%"
user_agent: "%REQ(USER-AGENT)%"
request_id: "%REQ(X-REQUEST-ID)%"
upstream_address: "%UPSTREAM_REMOTE_ADDRESS%"
upstream_cluster: "%UPSTREAM_CLUSTER%"
upstream_local_address: "%UPSTREAM_LOCAL_ADDRESS%"
downstream_local_address: "%DOWNSTREAM_LOCAL_ADDRESS%"
downstream_remote_address: "%DOWNSTREAM_REMOTE_ADDRESS%"
requested_server