From 9d4c0f407d466c54341419553178aefe6b9e1e19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=84=B6?= Date: Wed, 20 May 2026 09:48:48 +0000 Subject: [PATCH 1/8] feat: add tracing auto test --- .../installing-distributed-tracing.mdx | 40 ++-- ...nme-test_installing-distributed-tracing.sh | 203 ++++++++++++++++++ ...e-test_uninstalling-distributed-tracing.sh | 80 +++++++ .../uninstalling-distributed-tracing.mdx | 18 +- 4 files changed, 312 insertions(+), 29 deletions(-) create mode 100755 docs/en/installing/runme-test_installing-distributed-tracing.sh create mode 100755 docs/en/uninstalling/runme-test_uninstalling-distributed-tracing.sh diff --git a/docs/en/installing/installing-distributed-tracing.mdx b/docs/en/installing/installing-distributed-tracing.mdx index 80cf667..5a6aaad 100644 --- a/docs/en/installing/installing-distributed-tracing.mdx +++ b/docs/en/installing/installing-distributed-tracing.mdx @@ -47,7 +47,7 @@ Jaeger v2 is deployed as an `OpenTelemetryCollector` custom resource managed by 2. Retrieve platform configuration and Jaeger-related container images from the cluster: - ```bash + ```bash {name=install-tracing:get-platform-config} export PLATFORM_URL=$(kubectl -nkube-public get configmap global-info -o jsonpath='{.data.platformURL}') export CLUSTER_NAME=$(kubectl -nkube-public get configmap global-info -o jsonpath='{.data.clusterName}') export ALB_CLASS_NAME=$(kubectl -nkube-public get configmap global-info -o jsonpath='{.data.systemAlbIngressClassName}') @@ -77,7 +77,7 @@ Jaeger v2 is deployed as an `OpenTelemetryCollector` custom resource managed by 3. Set default environment variables. You can adjust these values to match your deployment requirements: - ```bash + ```bash {name=install-tracing:set-jaeger-defaults} # Namespace for the Jaeger instance export JAEGER_NS="jaeger-system" # Name of the Jaeger instance @@ -90,7 +90,7 @@ Jaeger v2 is deployed as an `OpenTelemetryCollector` custom resource managed by 4. Create the Jaeger namespace and Elasticsearch credentials Secret: - ```bash + ```bash {name=install-tracing:create-jaeger-ns-and-es-secret} kubectl create namespace ${JAEGER_NS} kubectl create secret generic es-credentials \ @@ -101,13 +101,13 @@ Jaeger v2 is deployed as an `OpenTelemetryCollector` custom resource managed by Verify that the Secret was created: - ```bash + ```bash {name=install-tracing:verify-es-secret} kubectl get secret es-credentials -n ${JAEGER_NS} ``` 5. Create an [ILM (Index Lifecycle Management) Policy](../configuration/storage-backends/elasticsearch.mdx#ilm-support) in Elasticsearch. Jaeger uses ILM to manage index rollover and retention: - ```bash + ```bash {name=install-tracing:create-ilm-policy} curl -k -u "${ES_USER}:${ES_PASS}" -X PUT \ "${ES_ENDPOINT}/_ilm/policy/jaeger-ilm-policy" \ -H 'Content-Type: application/json' \ @@ -147,7 +147,7 @@ Jaeger v2 is deployed as an `OpenTelemetryCollector` custom resource managed by Verify the ILM Policy: - ```bash + ```bash {name=install-tracing:verify-ilm-policy} curl -k -u "${ES_USER}:${ES_PASS}" "${ES_ENDPOINT}/_ilm/policy/jaeger-ilm-policy?pretty" ``` @@ -155,7 +155,7 @@ Jaeger v2 is deployed as an `OpenTelemetryCollector` custom resource managed by 6. [Initialize](../configuration/storage-backends/elasticsearch.mdx#index-rollover) index aliases and templates using the `jaeger-es-rollover` tool. This prepares Elasticsearch for Jaeger data storage: - ```bash + ```bash {name=install-tracing:create-rollover-init-job} kubectl apply -n ${JAEGER_NS} -f - </dev/null | base64 | tr -d -- '\n' | tr -- '+/' '-_') # Create the Secret: @@ -234,7 +234,7 @@ Jaeger v2 is deployed as an `OpenTelemetryCollector` custom resource managed by 9. Create a file named `jaeger.yaml` with the following content: - ```yaml title="jaeger.yaml" + ```yaml title="jaeger.yaml" {name=install-tracing:jaeger-yaml} apiVersion: opentelemetry.io/v1beta1 kind: OpenTelemetryCollector metadata: @@ -430,24 +430,24 @@ Jaeger v2 is deployed as an `OpenTelemetryCollector` custom resource managed by 10. Render the manifest with `envsubst` and apply the configuration: - ```bash + ```bash {name=install-tracing:apply-jaeger} envsubst < jaeger.yaml | kubectl apply -f - ``` 11. Wait for the Jaeger Pod to be ready: - ```bash + ```bash {name=install-tracing:wait-jaeger-rollout} kubectl rollout status deployment/${JAEGER_INSTANCE_NAME}-collector \ -n ${JAEGER_NS} --timeout=180s ``` 12. Label the namespace and create an Ingress to expose the Jaeger UI: - ```bash + ```bash {name=install-tracing:label-jaeger-ns} kubectl label namespace ${JAEGER_NS} cpaas.io/project=cpaas-system --overwrite ``` - ```bash + ```bash {name=install-tracing:create-jaeger-ingress} kubectl apply -n ${JAEGER_NS} -f - </clusters//jaeger`, where ` Run the following command to print the Jaeger UI URL: -```bash +```bash {name=install-tracing:print-jaeger-url} echo "Jaeger UI: ${PLATFORM_URL}${JAEGER_BASEPATH}" ``` @@ -493,7 +493,7 @@ After Jaeger v2 is running, deploy an OpenTelemetry Collector instance to receiv 1. Create an `OpenTelemetryCollector` resource: - ```yaml + ```bash {name=install-tracing:create-otel-collector} kubectl apply -f - </dev/null + "$@" + local rc=$? + popd >/dev/null + return $rc +} + +# 测试函数:分布式调用链安装 +test_installing_distributed_tracing() { + log_info "==========================================" + log_info "开始 Alauda Distributed Tracing 安装测试" + log_info "==========================================" + + # 步骤 0: 检查 Elasticsearch 依赖(外部强依赖,缺失则 SKIPPED) + if [ -z "${TRACING_ES_ENDPOINT:-}" ] || [ -z "${TRACING_ES_USER:-}" ] || [ -z "${TRACING_ES_PASS:-}" ]; then + log_warn "SKIPPED: 未设置 TRACING_ES_ENDPOINT / TRACING_ES_USER / TRACING_ES_PASS,跳过分布式调用链安装测试" + return 0 + fi + + # 步骤 1: 安装 Alauda Build of OpenTelemetry v2 Operator(跨仓库前置依赖) + log_info "步骤 1: 安装 OpenTelemetry v2 Operator" + if [ -z "${OTEL_REPO_ROOT:-}" ]; then + log_error "OTEL_REPO_ROOT 未注入,无法定位 opentelemetry-docs 安装 OTel Operator" + return 1 + fi + _in_otel_repo install_operator \ + "opentelemetry-operator2" \ + "opentelemetry-operator2" \ + "$PKG_OPENTELEMETRY_OPERATOR2_URL" \ + "install-otel" || { + log_error "OTel Operator 安装失败" + return 1 + } + + # 步骤 2: 注入 Elasticsearch 连接环境变量(替代文档步骤 1 的占位符) + log_info "步骤 2: 设置 Elasticsearch 连接环境变量" + export ES_ENDPOINT="$TRACING_ES_ENDPOINT" + export ES_USER="$TRACING_ES_USER" + export ES_PASS="$TRACING_ES_PASS" + + # 步骤 3: 拉取平台配置与 Jaeger 镜像 + log_info "步骤 3: 拉取平台配置" + eval "$(runme print install-tracing:get-platform-config)" || { + log_error "拉取平台配置失败" + return 1 + } + + # 步骤 4: 设置 Jaeger 默认环境变量 + log_info "步骤 4: 设置 Jaeger 默认环境变量" + eval "$(runme print install-tracing:set-jaeger-defaults)" || { + log_error "设置 Jaeger 默认环境变量失败" + return 1 + } + + # 步骤 5: 创建 Jaeger 命名空间与 ES 凭据 Secret + log_info "步骤 5: 创建命名空间与 ES 凭据 Secret" + runme run install-tracing:create-jaeger-ns-and-es-secret || { + log_error "创建命名空间与 ES Secret 失败" + return 1 + } + + # 步骤 5.1: 验证 ES Secret + log_info "步骤 5.1: 验证 ES Secret" + runme run install-tracing:verify-es-secret || { + log_error "验证 ES Secret 失败" + return 1 + } + + # 步骤 6: 创建 ILM Policy + log_info "步骤 6: 创建 ILM Policy" + runme run install-tracing:create-ilm-policy || { + log_error "创建 ILM Policy 失败" + return 1 + } + + # 步骤 6.1: 验证 ILM Policy + log_info "步骤 6.1: 验证 ILM Policy" + runme run install-tracing:verify-ilm-policy || { + log_error "验证 ILM Policy 失败" + return 1 + } + + # 步骤 7: 创建 jaeger-es-rollover-init Job + log_info "步骤 7: 创建 rollover-init Job" + runme run install-tracing:create-rollover-init-job || { + log_error "创建 rollover-init Job 失败" + return 1 + } + + # 步骤 7.1: 等待 Job 完成并验证索引模板/别名 + log_info "步骤 7.1: 等待 rollover-init Job 完成并验证" + runme run install-tracing:verify-rollover-init || { + log_error "验证 rollover-init 失败" + return 1 + } + + # 步骤 8: 清理 rollover-init Job + log_info "步骤 8: 清理 rollover-init Job" + runme run install-tracing:delete-rollover-init-job || { + log_error "清理 rollover-init Job 失败" + return 1 + } + + # 步骤 9: 创建 OAuth2 Proxy Secret + log_info "步骤 9: 创建 OAuth2 Proxy Secret" + runme run install-tracing:create-oauth2-proxy-secret || { + log_error "创建 OAuth2 Proxy Secret 失败" + return 1 + } + + # 步骤 10: 生成 jaeger.yaml 到 /tmp(envsubst apply 依赖 cwd 中存在该文件) + log_info "步骤 10: 生成 /tmp/jaeger.yaml" + runme print install-tracing:jaeger-yaml > /tmp/jaeger.yaml || { + log_error "生成 jaeger.yaml 失败" + return 1 + } + + # 步骤 11: envsubst 渲染并 apply(需在 /tmp 目录下执行) + log_info "步骤 11: 渲染并应用 jaeger.yaml" + kubectl_apply_runme_block "install-tracing:apply-jaeger" "/tmp/" || { + log_error "应用 jaeger.yaml 失败" + return 1 + } + + # 步骤 12: 等待 Jaeger collector deployment 就绪 + log_info "步骤 12: 等待 Jaeger collector 就绪" + runme run install-tracing:wait-jaeger-rollout || { + log_error "等待 Jaeger collector 就绪失败" + return 1 + } + + # 步骤 13: 给命名空间打 cpaas.io/project 标签 + log_info "步骤 13: 标记 Jaeger 命名空间" + runme run install-tracing:label-jaeger-ns || { + log_error "标记命名空间失败" + return 1 + } + + # 步骤 14: 创建 Jaeger Ingress + log_info "步骤 14: 创建 Jaeger Ingress" + runme run install-tracing:create-jaeger-ingress || { + log_error "创建 Jaeger Ingress 失败" + return 1 + } + + # 步骤 14.1: 等待 Ingress LoadBalancer 就绪 + log_info "步骤 14.1: 等待 Jaeger Ingress 就绪" + runme run install-tracing:wait-jaeger-ingress || { + log_error "等待 Jaeger Ingress 就绪失败" + return 1 + } + + # 步骤 15: 打印 Jaeger UI URL + log_info "步骤 15: 打印 Jaeger UI URL" + runme run install-tracing:print-jaeger-url || { + log_error "打印 Jaeger UI URL 失败" + return 1 + } + + # 步骤 16: 创建 otel OpenTelemetryCollector + log_info "步骤 16: 创建 OpenTelemetry Collector" + runme run install-tracing:create-otel-collector || { + log_error "创建 OpenTelemetry Collector 失败" + return 1 + } + + # 步骤 17: 等待 otel collector deployment 就绪 + log_info "步骤 17: 等待 OpenTelemetry Collector 就绪" + runme run install-tracing:wait-otel-rollout || { + log_error "等待 OpenTelemetry Collector 就绪失败" + return 1 + } + + # 步骤 18: 部署 telemetrygen 生成测试 trace(内含 wait/delete,约 150s) + log_info "步骤 18: 部署 telemetrygen 生成测试 trace" + runme run install-tracing:deploy-telemetrygen || { + log_error "telemetrygen 端到端验证失败" + return 1 + } + + log_success "==========================================" + log_success "Alauda Distributed Tracing 安装测试完成,所有验证通过!" + log_success "==========================================" + return 0 +} diff --git a/docs/en/uninstalling/runme-test_uninstalling-distributed-tracing.sh b/docs/en/uninstalling/runme-test_uninstalling-distributed-tracing.sh new file mode 100755 index 0000000..b72d416 --- /dev/null +++ b/docs/en/uninstalling/runme-test_uninstalling-distributed-tracing.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# Alauda Distributed Tracing 卸载文档测试脚本 +# 对应文档: docs/en/uninstalling/uninstalling-distributed-tracing.mdx +# 覆盖范围: 「Uninstalling via the CLI」章节;「via the web console」为 UI 操作不可自动化。 + +set -e + +: "${FRAMEWORK_ROOT:?该脚本需经 docs-runme-tests/run.sh 运行}" + +# 加载框架函数库 +source "$FRAMEWORK_ROOT/framework/common.sh" +source "$FRAMEWORK_ROOT/framework/verify.sh" + +# 执行删除类 runme 块并校验输出(模式 B) +# 用法: _delete_and_verify <步骤描述> +# 期望输出取自配对的 -output 代码块。 +_delete_and_verify() { + local desc="$1" block="$2" + local output expected + output=$(runme run "$block" 2>&1) || { + log_error "${desc}失败" + log_error "输出: $output" + return 1 + } + expected=$(runme print "${block}-output") + if ! __cmp_contains "$output" "$expected"; then + log_error "${desc}输出校验失败" + log_error "期待包含: $expected" + log_error "实际输出: $output" + return 1 + fi + log_success "${desc}通过" + return 0 +} + +# 测试函数:分布式调用链卸载 +test_uninstalling_distributed_tracing() { + log_info "==========================================" + log_info "开始 Alauda Distributed Tracing 卸载测试" + log_info "==========================================" + + # 步骤 0: 检查 ES 依赖(与安装测试保持一致;未设置则 SKIPPED) + if [ -z "${TRACING_ES_ENDPOINT:-}" ]; then + log_warn "SKIPPED: 未设置 TRACING_ES_ENDPOINT,跳过分布式调用链卸载测试" + return 0 + fi + + # 步骤 1: 设置环境变量(JAEGER_NS / JAEGER_INSTANCE_NAME) + log_info "步骤 1: 设置卸载环境变量" + eval "$(runme print uninstall-tracing:set-env)" || { + log_error "设置卸载环境变量失败" + return 1 + } + + # 步骤 2: 删除 otel OpenTelemetryCollector 实例 + log_info "步骤 2: 删除 OpenTelemetry Collector 实例" + _delete_and_verify "删除 OpenTelemetry Collector 实例" uninstall-tracing:delete-otel-collector || return 1 + + # 步骤 3: 删除 jaeger OpenTelemetryCollector 实例 + log_info "步骤 3: 删除 Jaeger v2 实例" + _delete_and_verify "删除 Jaeger v2 实例" uninstall-tracing:delete-jaeger-collector || return 1 + + # 步骤 4: 删除 Jaeger 命名空间 + log_info "步骤 4: 删除 Jaeger 命名空间" + _delete_and_verify "删除 Jaeger 命名空间" uninstall-tracing:delete-jaeger-ns || return 1 + + # 步骤 5: (可选) 删除 OTel Operator subscription + # 受 --skip-operator-and-crds 控制:传入时保留 Operator 以便后续测试复用。 + if [ "${SKIP_OPERATOR_AND_CRDS:-false}" = "true" ]; then + log_info "步骤 5: 跳过删除 OTel Operator subscription (--skip-operator-and-crds)" + else + log_info "步骤 5: 删除 OTel Operator subscription" + _delete_and_verify "删除 OTel Operator subscription" uninstall-tracing:delete-otel-subscription || return 1 + fi + + log_success "==========================================" + log_success "Alauda Distributed Tracing 卸载测试完成,所有验证通过!" + log_success "==========================================" + return 0 +} diff --git a/docs/en/uninstalling/uninstalling-distributed-tracing.mdx b/docs/en/uninstalling/uninstalling-distributed-tracing.mdx index 1e151b7..df3e429 100644 --- a/docs/en/uninstalling/uninstalling-distributed-tracing.mdx +++ b/docs/en/uninstalling/uninstalling-distributed-tracing.mdx @@ -80,7 +80,7 @@ For more details, see Date: Wed, 20 May 2026 20:26:14 +0800 Subject: [PATCH 2/8] fix: docs/en/installing/installing-distributed-tracing.mdx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 王然 --- docs/en/installing/installing-distributed-tracing.mdx | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/en/installing/installing-distributed-tracing.mdx b/docs/en/installing/installing-distributed-tracing.mdx index 5a6aaad..06065d2 100644 --- a/docs/en/installing/installing-distributed-tracing.mdx +++ b/docs/en/installing/installing-distributed-tracing.mdx @@ -91,12 +91,13 @@ Jaeger v2 is deployed as an `OpenTelemetryCollector` custom resource managed by 4. Create the Jaeger namespace and Elasticsearch credentials Secret: ```bash {name=install-tracing:create-jaeger-ns-and-es-secret} - kubectl create namespace ${JAEGER_NS} + kubectl get namespace ${JAEGER_NS} &> /dev/null || kubectl create namespace ${JAEGER_NS} kubectl create secret generic es-credentials \ --namespace=${JAEGER_NS} \ --from-literal=ES_USER=${ES_USER} \ - --from-literal=ES_PASS=${ES_PASS} + --from-literal=ES_PASS=${ES_PASS} \ + --dry-run=client -o yaml | kubectl apply -f - ``` Verify that the Secret was created: @@ -229,7 +230,8 @@ Jaeger v2 is deployed as an `OpenTelemetryCollector` custom resource managed by kubectl create secret generic ${JAEGER_INSTANCE_NAME}-oauth2-proxy \ --namespace=${JAEGER_NS} \ --from-literal=OAUTH2_PROXY_CLIENT_SECRET=${OIDC_CLIENT_SECRET} \ - --from-literal=OAUTH2_PROXY_COOKIE_SECRET=${OAUTH2_PROXY_COOKIE_SECRET} + --from-literal=OAUTH2_PROXY_COOKIE_SECRET=${OAUTH2_PROXY_COOKIE_SECRET} \ + --dry-run=client -o yaml | kubectl apply -f - ``` 9. Create a file named `jaeger.yaml` with the following content: @@ -658,7 +660,8 @@ Enabling SPM requires two changes: adding a SpanMetrics Connector to the OpenTel kubectl create secret generic monitoring-credentials \ --namespace=${JAEGER_NS} \ --from-literal=username=${MONITORING_USERNAME} \ - --from-literal=password=${MONITORING_PASSWORD} + --from-literal=password=${MONITORING_PASSWORD} \ + --dry-run=client -o yaml | kubectl apply -f - ``` 3. Patch the OpenTelemetry Collector to enable the SpanMetrics Connector. This adds a `spanmetrics` connector that generates RED metrics from spans and exports them via a Prometheus exporter: From 99810dda9a0c857f480ca762fbf494dd2c9e8c6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=84=B6?= Date: Wed, 20 May 2026 13:07:46 +0000 Subject: [PATCH 3/8] add tracing SPM test --- .../installing-distributed-tracing.mdx | 28 ++-- ...nme-test_installing-distributed-tracing.sh | 126 +++++++++++++++++- ...e-test_uninstalling-distributed-tracing.sh | 27 +++- 3 files changed, 160 insertions(+), 21 deletions(-) diff --git a/docs/en/installing/installing-distributed-tracing.mdx b/docs/en/installing/installing-distributed-tracing.mdx index 06065d2..a79b24e 100644 --- a/docs/en/installing/installing-distributed-tracing.mdx +++ b/docs/en/installing/installing-distributed-tracing.mdx @@ -493,10 +493,9 @@ echo "Jaeger UI: ${PLATFORM_URL}${JAEGER_BASEPATH}" After Jaeger v2 is running, deploy an OpenTelemetry Collector instance to receive trace data from instrumented applications and forward it to Jaeger. -1. Create an `OpenTelemetryCollector` resource: +1. Create a file named `otel-collector.yaml` with the following content: - ```bash {name=install-tracing:create-otel-collector} - kubectl apply -f - < @@ -576,7 +574,13 @@ After Jaeger v2 is running, deploy an OpenTelemetry Collector instance to receiv 6. The trace pipeline receives data via OTLP and Zipkin, processes it through `memory_limiter` and `batch`, and exports to both the `debug` exporter (for logging) and `otlp/traces` (for forwarding to Jaeger). -2. Wait for the Collector pod to be ready: +2. Render the manifest with `envsubst` and apply the configuration: + + ```bash {name=install-tracing:apply-otel-collector} + envsubst < otel-collector.yaml | kubectl apply -f - + ``` + +3. Wait for the Collector pod to be ready: ```bash {name=install-tracing:wait-otel-rollout} kubectl rollout status deployment/otel-collector \ @@ -646,7 +650,7 @@ Enabling SPM requires two changes: adding a SpanMetrics Connector to the OpenTel 1. Retrieve monitoring endpoint and credentials from the cluster: - ```bash + ```bash {name=install-tracing-spm:get-monitoring-config} export MONITORING_URL=$(kubectl get feature monitoring -o jsonpath='{.spec.accessInfo.database.address}') MONITORING_SECRET_NAME=$(kubectl get feature monitoring -o jsonpath='{.spec.accessInfo.database.basicAuth.secretName}') @@ -656,7 +660,7 @@ Enabling SPM requires two changes: adding a SpanMetrics Connector to the OpenTel 2. Create a Secret for monitoring credentials: - ```bash + ```bash {name=install-tracing-spm:create-monitoring-secret} kubectl create secret generic monitoring-credentials \ --namespace=${JAEGER_NS} \ --from-literal=username=${MONITORING_USERNAME} \ @@ -666,7 +670,7 @@ Enabling SPM requires two changes: adding a SpanMetrics Connector to the OpenTel 3. Patch the OpenTelemetry Collector to enable the SpanMetrics Connector. This adds a `spanmetrics` connector that generates RED metrics from spans and exports them via a Prometheus exporter: - ```bash + ```bash {name=install-tracing-spm:patch-otel-collector} kubectl patch opentelemetrycollector otel -n ${JAEGER_NS} --type=merge -p ' spec: config: @@ -684,14 +688,14 @@ Enabling SPM requires two changes: adding a SpanMetrics Connector to the OpenTel Wait for the Collector to restart: - ```bash + ```bash {name=install-tracing-spm:wait-otel-rollout} kubectl rollout status deployment/otel-collector \ -n ${JAEGER_NS} --timeout=180s ``` 4. Create a file named `jaeger-spm-patch.yaml` with the following content. This patch adds a PromQL metrics backend to Jaeger and enables the Monitor tab: - ```yaml title="jaeger-spm-patch.yaml" + ```yaml title="jaeger-spm-patch.yaml" {name=install-tracing-spm:jaeger-spm-patch-yaml} spec: volumes: - name: es-credentials @@ -760,14 +764,14 @@ Enabling SPM requires two changes: adding a SpanMetrics Connector to the OpenTel 5. Apply the patch: - ```bash + ```bash {name=install-tracing-spm:apply-jaeger-patch} kubectl patch opentelemetrycollector ${JAEGER_INSTANCE_NAME} -n ${JAEGER_NS} \ --type=merge -p "$(envsubst < jaeger-spm-patch.yaml)" ``` Wait for Jaeger to restart: - ```bash + ```bash {name=install-tracing-spm:wait-jaeger-rollout} kubectl rollout status deployment/${JAEGER_INSTANCE_NAME}-collector \ -n ${JAEGER_NS} --timeout=180s ``` diff --git a/docs/en/installing/runme-test_installing-distributed-tracing.sh b/docs/en/installing/runme-test_installing-distributed-tracing.sh index 8f5eaf3..92eb2e2 100755 --- a/docs/en/installing/runme-test_installing-distributed-tracing.sh +++ b/docs/en/installing/runme-test_installing-distributed-tracing.sh @@ -23,6 +23,105 @@ _in_otel_repo() { return $rc } +# 部署 telemetrygen 生成测试 trace。 +# 取出文档代码块后按需改写再执行(镜像替换与测试时长两处改动合并于此函数): +# - 测试时长:由 TRACING_TEST_DURATION 控制(默认 60s),覆盖文档默认的 150s +# - 镜像:USE_MESH_V2_TEST_SUITE_PLUGIN=true 时,参考 projects/mesh/project.sh 的 +# kubectl_apply_with_mirror,从 mesh-v2-test-suite 集群插件 registry 改写 telemetrygen 镜像 +_deploy_telemetrygen() { + local content + content=$(runme print install-tracing:deploy-telemetrygen 2>/dev/null) + if [ -z "$content" ]; then + log_error "无法获取代码块内容: install-tracing:deploy-telemetrygen" + return 1 + fi + + # 改写测试时长(文档默认 150s) + local duration="${TRACING_TEST_DURATION:-60s}" + log_info "telemetrygen 测试时长: $duration" + content="${content//--duration=150s/--duration=$duration}" + + # 改写镜像:USE_MESH_V2_TEST_SUITE_PLUGIN=true 时使用集群插件镜像仓库 + if [ "${USE_MESH_V2_TEST_SUITE_PLUGIN:-false}" = "true" ]; then + local registry + registry=$(kubectl -n cpaas-system get cm mesh-v2-test-suite-manifest \ + -o jsonpath='{.data.registry}' 2>/dev/null) + if [ -z "$registry" ]; then + log_error "USE_MESH_V2_TEST_SUITE_PLUGIN=true 但未能从 cpaas-system/mesh-v2-test-suite-manifest 读取 data.registry" + return 1 + fi + log_info "使用 mesh-v2-test-suite 集群插件镜像仓库: $registry" + content=$(printf '%s' "$content" | sed "s|ghcr\.io/open-telemetry/|${registry}/asm/|") + fi + + eval "$content" +} + +# SPM (Service Performance Monitoring) 章节测试,覆盖 install-tracing-spm:* 代码块。 +# 由 test_installing_distributed_tracing 在 TRACING_TEST_SPM=true 时调用。 +_test_spm() { + log_header "Service Performance Monitoring (SPM) 测试" + + # 步骤 19: 拉取 monitoring 端点与凭据 + log_info "步骤 19: 拉取 monitoring 配置" + eval "$(runme print install-tracing-spm:get-monitoring-config)" || { + log_error "拉取 monitoring 配置失败" + return 1 + } + + # 步骤 20: 创建 monitoring 凭据 Secret + log_info "步骤 20: 创建 monitoring 凭据 Secret" + runme run install-tracing-spm:create-monitoring-secret || { + log_error "创建 monitoring 凭据 Secret 失败" + return 1 + } + + # 步骤 21: Patch OpenTelemetry Collector 启用 SpanMetrics Connector + log_info "步骤 21: Patch OpenTelemetry Collector 启用 spanmetrics" + runme run install-tracing-spm:patch-otel-collector || { + log_error "Patch OpenTelemetry Collector 失败" + return 1 + } + + # 步骤 22: 等待 OpenTelemetry Collector 重启就绪 + log_info "步骤 22: 等待 OpenTelemetry Collector 重启就绪" + runme run install-tracing-spm:wait-otel-rollout || { + log_error "等待 OpenTelemetry Collector 重启失败" + return 1 + } + + # 步骤 23: 生成 jaeger-spm-patch.yaml 到 /tmp + log_info "步骤 23: 生成 /tmp/jaeger-spm-patch.yaml" + runme print install-tracing-spm:jaeger-spm-patch-yaml > /tmp/jaeger-spm-patch.yaml || { + log_error "生成 jaeger-spm-patch.yaml 失败" + return 1 + } + + # 步骤 24: 应用 SPM patch(需在 /tmp 目录下执行) + log_info "步骤 24: 应用 jaeger-spm-patch.yaml" + kubectl_apply_runme_block "install-tracing-spm:apply-jaeger-patch" "/tmp/" || { + log_error "应用 jaeger-spm-patch.yaml 失败" + return 1 + } + + # 步骤 25: 等待 Jaeger 重启就绪 + log_info "步骤 25: 等待 Jaeger 重启就绪" + runme run install-tracing-spm:wait-jaeger-rollout || { + log_error "等待 Jaeger 重启失败" + return 1 + } + + # 步骤 26: 重新部署 telemetrygen 验证 SPM 指标(文档 Verification 要求) + log_info "步骤 26: 重新部署 telemetrygen 验证 SPM" + _deploy_telemetrygen || { + log_error "SPM telemetrygen 验证失败" + return 1 + } + + log_success "SPM 测试完成" + return 0 +} + # 测试函数:分布式调用链安装 test_installing_distributed_tracing() { log_info "==========================================" @@ -175,10 +274,17 @@ test_installing_distributed_tracing() { return 1 } - # 步骤 16: 创建 otel OpenTelemetryCollector - log_info "步骤 16: 创建 OpenTelemetry Collector" - runme run install-tracing:create-otel-collector || { - log_error "创建 OpenTelemetry Collector 失败" + # 步骤 16: 生成 otel-collector.yaml 到 /tmp + log_info "步骤 16: 生成 /tmp/otel-collector.yaml" + runme print install-tracing:otel-collector-yaml > /tmp/otel-collector.yaml || { + log_error "生成 otel-collector.yaml 失败" + return 1 + } + + # 步骤 16.1: envsubst 渲染并 apply(需在 /tmp 目录下执行) + log_info "步骤 16.1: 渲染并应用 otel-collector.yaml" + kubectl_apply_runme_block "install-tracing:apply-otel-collector" "/tmp/" || { + log_error "应用 otel-collector.yaml 失败" return 1 } @@ -189,13 +295,21 @@ test_installing_distributed_tracing() { return 1 } - # 步骤 18: 部署 telemetrygen 生成测试 trace(内含 wait/delete,约 150s) + # 步骤 18: 部署 telemetrygen 生成测试 trace(内含 wait/delete) log_info "步骤 18: 部署 telemetrygen 生成测试 trace" - runme run install-tracing:deploy-telemetrygen || { + _deploy_telemetrygen || { log_error "telemetrygen 端到端验证失败" return 1 } + # 步骤 19-26:(可选)Service Performance Monitoring (SPM) 章节 + # SPM 需 ACP monitoring,默认跳过;设置 TRACING_TEST_SPM=true 启用。 + if [ "${TRACING_TEST_SPM:-false}" = "true" ]; then + _test_spm || return 1 + else + log_warn "跳过 SPM 章节测试(未设置 TRACING_TEST_SPM=true)" + fi + log_success "==========================================" log_success "Alauda Distributed Tracing 安装测试完成,所有验证通过!" log_success "==========================================" diff --git a/docs/en/uninstalling/runme-test_uninstalling-distributed-tracing.sh b/docs/en/uninstalling/runme-test_uninstalling-distributed-tracing.sh index b72d416..88a5e80 100755 --- a/docs/en/uninstalling/runme-test_uninstalling-distributed-tracing.sh +++ b/docs/en/uninstalling/runme-test_uninstalling-distributed-tracing.sh @@ -11,6 +11,16 @@ set -e source "$FRAMEWORK_ROOT/framework/common.sh" source "$FRAMEWORK_ROOT/framework/verify.sh" +# 在 opentelemetry-docs 仓库内执行命令的小封装(删除 CRDs 为跨仓库操作, +# uninstall-otel:* 代码块位于 opentelemetry-docs)。$OTEL_REPO_ROOT 由 run.sh 引擎注入。 +_in_otel_repo() { + pushd "$OTEL_REPO_ROOT" >/dev/null + "$@" + local rc=$? + popd >/dev/null + return $rc +} + # 执行删除类 runme 块并校验输出(模式 B) # 用法: _delete_and_verify <步骤描述> # 期望输出取自配对的 -output 代码块。 @@ -64,13 +74,24 @@ test_uninstalling_distributed_tracing() { log_info "步骤 4: 删除 Jaeger 命名空间" _delete_and_verify "删除 Jaeger 命名空间" uninstall-tracing:delete-jaeger-ns || return 1 - # 步骤 5: (可选) 删除 OTel Operator subscription - # 受 --skip-operator-and-crds 控制:传入时保留 Operator 以便后续测试复用。 + # 步骤 5-6: (可选) 删除 OTel Operator subscription 与 OpenTelemetry CRDs + # 受 --skip-operator-and-crds 控制:传入时保留 Operator 与 CRDs 以便后续测试复用。 if [ "${SKIP_OPERATOR_AND_CRDS:-false}" = "true" ]; then - log_info "步骤 5: 跳过删除 OTel Operator subscription (--skip-operator-and-crds)" + log_info "步骤 5-6: 跳过删除 OTel Operator subscription 与 CRDs (--skip-operator-and-crds)" else log_info "步骤 5: 删除 OTel Operator subscription" _delete_and_verify "删除 OTel Operator subscription" uninstall-tracing:delete-otel-subscription || return 1 + + # 步骤 6: 删除 OpenTelemetry CRDs(跨仓库:opentelemetry-docs 的 uninstall-otel:delete-crds) + log_info "步骤 6: 删除 OpenTelemetry CRDs" + if [ -z "${OTEL_REPO_ROOT:-}" ]; then + log_error "OTEL_REPO_ROOT 未注入,无法定位 opentelemetry-docs 删除 CRDs" + return 1 + fi + _in_otel_repo runme run uninstall-otel:delete-crds || { + log_error "删除 OpenTelemetry CRDs 失败" + return 1 + } fi log_success "==========================================" From dfd444fda5e536aa6dccbd5461058351dd545133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=84=B6?= Date: Wed, 20 May 2026 21:46:24 +0800 Subject: [PATCH 4/8] fix: docs/en/installing/runme-test_installing-distributed-tracing.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 王然 --- .../runme-test_installing-distributed-tracing.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/en/installing/runme-test_installing-distributed-tracing.sh b/docs/en/installing/runme-test_installing-distributed-tracing.sh index 92eb2e2..e3f373a 100755 --- a/docs/en/installing/runme-test_installing-distributed-tracing.sh +++ b/docs/en/installing/runme-test_installing-distributed-tracing.sh @@ -239,6 +239,16 @@ test_installing_distributed_tracing() { return 1 } + # 步骤 11.1: 等待 OpenTelemetryCollector 状态副本数收敛 + log_info "步骤 11.1: 等待 OpenTelemetryCollector status.scale.statusReplicas=1/1" + kubectl wait "opentelemetrycollector/${JAEGER_INSTANCE_NAME}" \ + -n "${JAEGER_NS}" \ + --for=jsonpath='{.status.scale.statusReplicas}'=1/1 \ + --timeout=180s || { + log_error "等待 OpenTelemetryCollector status.scale.statusReplicas=1/1 失败" + return 1 + } + # 步骤 12: 等待 Jaeger collector deployment 就绪 log_info "步骤 12: 等待 Jaeger collector 就绪" runme run install-tracing:wait-jaeger-rollout || { @@ -304,7 +314,7 @@ test_installing_distributed_tracing() { # 步骤 19-26:(可选)Service Performance Monitoring (SPM) 章节 # SPM 需 ACP monitoring,默认跳过;设置 TRACING_TEST_SPM=true 启用。 - if [ "${TRACING_TEST_SPM:-false}" = "true" ]; then + if [ "${TRACING_TEST_SPM:-true}" = "true" ]; then _test_spm || return 1 else log_warn "跳过 SPM 章节测试(未设置 TRACING_TEST_SPM=true)" From ba2d4c2d3a21557974187f804286cb5c3b10921a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=84=B6?= Date: Wed, 20 May 2026 22:08:32 +0800 Subject: [PATCH 5/8] fix: docs/en/installing/runme-test_installing-distributed-tracing.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 王然 --- .../runme-test_installing-distributed-tracing.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/en/installing/runme-test_installing-distributed-tracing.sh b/docs/en/installing/runme-test_installing-distributed-tracing.sh index e3f373a..b69727c 100755 --- a/docs/en/installing/runme-test_installing-distributed-tracing.sh +++ b/docs/en/installing/runme-test_installing-distributed-tracing.sh @@ -298,6 +298,16 @@ test_installing_distributed_tracing() { return 1 } + # 步骤 16.2: 等待 otel OpenTelemetryCollector 状态副本数收敛 + log_info "步骤 16.2: 等待 otel OpenTelemetryCollector status.scale.statusReplicas=1/1" + kubectl wait "opentelemetrycollector/otel" \ + -n "${JAEGER_NS}" \ + --for=jsonpath='{.status.scale.statusReplicas}'=1/1 \ + --timeout=180s || { + log_error "等待 otel OpenTelemetryCollector status.scale.statusReplicas=1/1 失败" + return 1 + } + # 步骤 17: 等待 otel collector deployment 就绪 log_info "步骤 17: 等待 OpenTelemetry Collector 就绪" runme run install-tracing:wait-otel-rollout || { From f73d8e2c9d96bb9b1fcbff4fa8fd696435a39dff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=84=B6?= Date: Thu, 21 May 2026 08:40:30 +0800 Subject: [PATCH 6/8] fix: docs/en/installing/runme-test_installing-distributed-tracing.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 王然 --- docs/en/installing/installing-distributed-tracing.mdx | 4 ++-- .../runme-test_installing-distributed-tracing.sh | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/en/installing/installing-distributed-tracing.mdx b/docs/en/installing/installing-distributed-tracing.mdx index a79b24e..95c4933 100644 --- a/docs/en/installing/installing-distributed-tracing.mdx +++ b/docs/en/installing/installing-distributed-tracing.mdx @@ -582,7 +582,7 @@ After Jaeger v2 is running, deploy an OpenTelemetry Collector instance to receiv 3. Wait for the Collector pod to be ready: - ```bash {name=install-tracing:wait-otel-rollout} + ```bash {name=install-tracing:wait-otel-collector-rollout} kubectl rollout status deployment/otel-collector \ -n ${JAEGER_NS} --timeout=180s ``` @@ -688,7 +688,7 @@ Enabling SPM requires two changes: adding a SpanMetrics Connector to the OpenTel Wait for the Collector to restart: - ```bash {name=install-tracing-spm:wait-otel-rollout} + ```bash {name=install-tracing-spm:wait-otel-collector-rollout} kubectl rollout status deployment/otel-collector \ -n ${JAEGER_NS} --timeout=180s ``` diff --git a/docs/en/installing/runme-test_installing-distributed-tracing.sh b/docs/en/installing/runme-test_installing-distributed-tracing.sh index b69727c..c60bf99 100755 --- a/docs/en/installing/runme-test_installing-distributed-tracing.sh +++ b/docs/en/installing/runme-test_installing-distributed-tracing.sh @@ -37,7 +37,7 @@ _deploy_telemetrygen() { fi # 改写测试时长(文档默认 150s) - local duration="${TRACING_TEST_DURATION:-60s}" + local duration="${TRACING_TEST_DURATION:-80s}" log_info "telemetrygen 测试时长: $duration" content="${content//--duration=150s/--duration=$duration}" @@ -85,7 +85,7 @@ _test_spm() { # 步骤 22: 等待 OpenTelemetry Collector 重启就绪 log_info "步骤 22: 等待 OpenTelemetry Collector 重启就绪" - runme run install-tracing-spm:wait-otel-rollout || { + runme run install-tracing-spm:wait-otel-collector-rollout || { log_error "等待 OpenTelemetry Collector 重启失败" return 1 } @@ -118,6 +118,9 @@ _test_spm() { return 1 } + # 打印 Jaeger UI URL + runme run install-tracing:print-jaeger-url + log_success "SPM 测试完成" return 0 } @@ -310,7 +313,7 @@ test_installing_distributed_tracing() { # 步骤 17: 等待 otel collector deployment 就绪 log_info "步骤 17: 等待 OpenTelemetry Collector 就绪" - runme run install-tracing:wait-otel-rollout || { + runme run install-tracing:wait-otel-collector-rollout || { log_error "等待 OpenTelemetry Collector 就绪失败" return 1 } From 53f65e44169c1213bbe7f32052f57255e049a80f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=84=B6?= Date: Thu, 21 May 2026 09:23:52 +0800 Subject: [PATCH 7/8] perf: TRACING_TELEMETRYGEN_TEST_DURATION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 王然 --- .../runme-test_installing-distributed-tracing.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/en/installing/runme-test_installing-distributed-tracing.sh b/docs/en/installing/runme-test_installing-distributed-tracing.sh index c60bf99..f9fa418 100755 --- a/docs/en/installing/runme-test_installing-distributed-tracing.sh +++ b/docs/en/installing/runme-test_installing-distributed-tracing.sh @@ -25,10 +25,12 @@ _in_otel_repo() { # 部署 telemetrygen 生成测试 trace。 # 取出文档代码块后按需改写再执行(镜像替换与测试时长两处改动合并于此函数): -# - 测试时长:由 TRACING_TEST_DURATION 控制(默认 60s),覆盖文档默认的 150s +# - 测试时长:第一次由 TRACING_TELEMETRYGEN_TEST_DURATION_1 控制(默认 30s), +# 第二次由 TRACING_TELEMETRYGEN_TEST_DURATION_2 控制(默认 80s),覆盖文档默认的 150s # - 镜像:USE_MESH_V2_TEST_SUITE_PLUGIN=true 时,参考 projects/mesh/project.sh 的 # kubectl_apply_with_mirror,从 mesh-v2-test-suite 集群插件 registry 改写 telemetrygen 镜像 _deploy_telemetrygen() { + local duration="$1" local content content=$(runme print install-tracing:deploy-telemetrygen 2>/dev/null) if [ -z "$content" ]; then @@ -37,7 +39,6 @@ _deploy_telemetrygen() { fi # 改写测试时长(文档默认 150s) - local duration="${TRACING_TEST_DURATION:-80s}" log_info "telemetrygen 测试时长: $duration" content="${content//--duration=150s/--duration=$duration}" @@ -113,7 +114,7 @@ _test_spm() { # 步骤 26: 重新部署 telemetrygen 验证 SPM 指标(文档 Verification 要求) log_info "步骤 26: 重新部署 telemetrygen 验证 SPM" - _deploy_telemetrygen || { + _deploy_telemetrygen "${TRACING_TELEMETRYGEN_TEST_DURATION_2:-80s}" || { log_error "SPM telemetrygen 验证失败" return 1 } @@ -320,7 +321,7 @@ test_installing_distributed_tracing() { # 步骤 18: 部署 telemetrygen 生成测试 trace(内含 wait/delete) log_info "步骤 18: 部署 telemetrygen 生成测试 trace" - _deploy_telemetrygen || { + _deploy_telemetrygen "${TRACING_TELEMETRYGEN_TEST_DURATION_1:-30s}" || { log_error "telemetrygen 端到端验证失败" return 1 } From a52383019b898febc2b29cc92f42375cf5d720b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=84=B6?= Date: Thu, 21 May 2026 10:44:19 +0800 Subject: [PATCH 8/8] perf: tracing es MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 王然 --- .../runme-test_installing-distributed-tracing.sh | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/en/installing/runme-test_installing-distributed-tracing.sh b/docs/en/installing/runme-test_installing-distributed-tracing.sh index f9fa418..75957eb 100755 --- a/docs/en/installing/runme-test_installing-distributed-tracing.sh +++ b/docs/en/installing/runme-test_installing-distributed-tracing.sh @@ -114,7 +114,7 @@ _test_spm() { # 步骤 26: 重新部署 telemetrygen 验证 SPM 指标(文档 Verification 要求) log_info "步骤 26: 重新部署 telemetrygen 验证 SPM" - _deploy_telemetrygen "${TRACING_TELEMETRYGEN_TEST_DURATION_2:-80s}" || { + _deploy_telemetrygen "${TRACING_TELEMETRYGEN_TEST_DURATION_2:-130s}" || { log_error "SPM telemetrygen 验证失败" return 1 } @@ -132,11 +132,20 @@ test_installing_distributed_tracing() { log_info "开始 Alauda Distributed Tracing 安装测试" log_info "==========================================" - # 步骤 0: 检查 Elasticsearch 依赖(外部强依赖,缺失则 SKIPPED) + # 步骤 0: 检查 Elasticsearch 配置(可由 tracing project_prepare 从 ACP ES 自动注入) if [ -z "${TRACING_ES_ENDPOINT:-}" ] || [ -z "${TRACING_ES_USER:-}" ] || [ -z "${TRACING_ES_PASS:-}" ]; then - log_warn "SKIPPED: 未设置 TRACING_ES_ENDPOINT / TRACING_ES_USER / TRACING_ES_PASS,跳过分布式调用链安装测试" + if [ -n "${TRACING_ACP_ES_CLUSTER:-}" ]; then + log_error "TRACING_ACP_ES_CLUSTER=${TRACING_ACP_ES_CLUSTER},但未能注入 TRACING_ES_ENDPOINT / TRACING_ES_USER / TRACING_ES_PASS" + return 1 + fi + log_warn "SKIPPED: TRACING_ACP_ES_CLUSTER 为空且未设置 TRACING_ES_ENDPOINT / TRACING_ES_USER / TRACING_ES_PASS,跳过分布式调用链安装测试" return 0 fi + if [ -n "${TRACING_ACP_ES_CLUSTER:-}" ]; then + log_info "使用 ACP ES 配置: cluster=${TRACING_ACP_ES_CLUSTER} endpoint=${TRACING_ES_ENDPOINT}" + else + log_info "使用手动 Elasticsearch 配置: endpoint=${TRACING_ES_ENDPOINT}" + fi # 步骤 1: 安装 Alauda Build of OpenTelemetry v2 Operator(跨仓库前置依赖) log_info "步骤 1: 安装 OpenTelemetry v2 Operator"