From 56f89478cebbf276bf7af2378b7682ac42f49c5d Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Mon, 18 May 2026 15:40:18 -0700 Subject: [PATCH 1/3] EE 2.0 --- files/aws-iam-policy.json | 32 ++++++++ templates/_helpers.tpl | 99 +++++++++++++++++++++++ templates/aws-secret.yaml | 15 ++++ templates/configmap.yaml | 137 ++++++++++++++++++++++++++++++++ templates/deployment.yaml | 82 ++++++++++++------- templates/ingress.yaml | 8 +- templates/networkpolicy.yaml | 18 ++--- templates/rbac.yaml | 102 ++++++++++++++++++++++++ templates/redis-deployment.yaml | 8 +- templates/redis-pdb.yaml | 6 +- templates/redis-service.yaml | 6 +- templates/secrets.yaml | 12 +-- templates/service.yaml | 8 +- values.yaml | 89 +++++++++++++++++++-- 14 files changed, 558 insertions(+), 64 deletions(-) create mode 100644 files/aws-iam-policy.json create mode 100644 templates/_helpers.tpl create mode 100644 templates/aws-secret.yaml create mode 100644 templates/configmap.yaml create mode 100644 templates/rbac.yaml diff --git a/files/aws-iam-policy.json b/files/aws-iam-policy.json new file mode 100644 index 0000000..841ca66 --- /dev/null +++ b/files/aws-iam-policy.json @@ -0,0 +1,32 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "RdsTopology", + "Effect": "Allow", + "Action": [ + "rds:DescribeDBClusters", + "rds:DescribeDBInstances", + "rds:DescribeDBClusterParameters", + "rds:DescribeDBParameters" + ], + "Resource": "*" + }, + { + "Sid": "CloudWatchMetrics", + "Effect": "Allow", + "Action": [ + "cloudwatch:GetMetricData" + ], + "Resource": "*" + }, + { + "Sid": "Ec2InstanceTypeSpecs", + "Effect": "Allow", + "Action": [ + "ec2:DescribeInstanceTypes" + ], + "Resource": "*" + } + ] +} diff --git a/templates/_helpers.tpl b/templates/_helpers.tpl new file mode 100644 index 0000000..12ddf3f --- /dev/null +++ b/templates/_helpers.tpl @@ -0,0 +1,99 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "pgdog-control.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +Truncated at 63 chars per the DNS naming spec. +*/}} +{{- define "pgdog-control.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Chart name and version, as used by the helm.sh/chart label. +*/}} +{{- define "pgdog-control.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Resource names for each component (preserve existing names for compatibility). +*/}} +{{- define "pgdog-control.control.fullname" -}} +{{- printf "%s-control" .Release.Name | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "pgdog-control.redis.fullname" -}} +{{- printf "%s-redis" .Release.Name | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +ServiceAccount name for the control component. Falls back to the +control fullname when not explicitly set in values. +*/}} +{{- define "pgdog-control.control.serviceAccountName" -}} +{{- if .Values.control.rbac.serviceAccountName }} +{{- .Values.control.rbac.serviceAccountName }} +{{- else }} +{{- include "pgdog-control.control.fullname" . }} +{{- end }} +{{- end }} + +{{/* +Common labels shared by all resources. +*/}} +{{- define "pgdog-control.commonLabels" -}} +helm.sh/chart: {{ include "pgdog-control.chart" . }} +app.kubernetes.io/part-of: {{ include "pgdog-control.name" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +{{- end }} + +{{/* +Labels for the control component. +*/}} +{{- define "pgdog-control.labels" -}} +{{ include "pgdog-control.commonLabels" . }} +{{ include "pgdog-control.selectorLabels" . }} +{{- end }} + +{{/* +Selector labels for the control component. +*/}} +{{- define "pgdog-control.selectorLabels" -}} +app.kubernetes.io/name: {{ include "pgdog-control.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/component: control +{{- end }} + +{{/* +Labels for the redis component. +*/}} +{{- define "pgdog-control.redis.labels" -}} +{{ include "pgdog-control.commonLabels" . }} +{{ include "pgdog-control.redis.selectorLabels" . }} +{{- end }} + +{{/* +Selector labels for the redis component. +*/}} +{{- define "pgdog-control.redis.selectorLabels" -}} +app.kubernetes.io/name: {{ include "pgdog-control.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/component: redis +{{- end }} diff --git a/templates/aws-secret.yaml b/templates/aws-secret.yaml new file mode 100644 index 0000000..84c9e5f --- /dev/null +++ b/templates/aws-secret.yaml @@ -0,0 +1,15 @@ +{{- if .Values.control.aws.accessKeyId -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "pgdog-control.control.fullname" . }}-aws-creds + labels: + {{- include "pgdog-control.labels" . | nindent 4 }} +type: Opaque +stringData: + AWS_ACCESS_KEY_ID: {{ .Values.control.aws.accessKeyId | quote }} + AWS_SECRET_ACCESS_KEY: {{ required "control.aws.secretAccessKey is required when accessKeyId is set" .Values.control.aws.secretAccessKey | quote }} + {{- with .Values.control.aws.sessionToken }} + AWS_SESSION_TOKEN: {{ . | quote }} + {{- end }} +{{- end -}} diff --git a/templates/configmap.yaml b/templates/configmap.yaml new file mode 100644 index 0000000..cc44ef3 --- /dev/null +++ b/templates/configmap.yaml @@ -0,0 +1,137 @@ +{{- $auth := .Values.control.config.auth | default dict -}} +{{- $cookieSecret := "" -}} +{{- if $auth.cookie_secret -}} +{{- $cookieSecret = $auth.cookie_secret -}} +{{- else -}} +{{- $existing := lookup "v1" "Secret" .Release.Namespace (printf "%s-secrets" .Release.Name) -}} +{{- if and $existing (index $existing.data "cookie_secret") -}} +{{- $cookieSecret = index $existing.data "cookie_secret" | b64dec -}} +{{- end -}} +{{- end -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "pgdog-control.control.fullname" . }}-config + labels: + {{- include "pgdog-control.labels" . | nindent 4 }} +data: + control.toml: | + {{- with .Values.control.config.rds }} + [rds] + {{- with .refresh_interval_secs }} + refresh_interval_secs = {{ . }} + {{- end }} + {{- end }} + + {{- with .Values.control.config.kube }} + + [kube] + {{- with .refresh_interval_secs }} + refresh_interval_secs = {{ . }} + {{- end }} + {{- end }} + + {{- with .Values.control.config.dns }} + + [dns] + {{- with .refresh_interval_secs }} + refresh_interval_secs = {{ . }} + {{- end }} + {{- end }} + + {{- with .Values.control.config.cloudwatch }} + + [cloudwatch] + {{- with .refresh_interval_secs }} + refresh_interval_secs = {{ . }} + {{- end }} + {{- with .lookback_secs }} + lookback_secs = {{ . }} + {{- end }} + {{- with .period_secs }} + period_secs = {{ . }} + {{- end }} + {{- end }} + + {{- with .Values.control.config.store }} + + [store] + {{- with .tick_secs }} + tick_secs = {{ . }} + {{- end }} + {{- with .stale_after_secs }} + stale_after_secs = {{ . }} + {{- end }} + {{- with .evict_after_secs }} + evict_after_secs = {{ . }} + {{- end }} + {{- with .metrics_retention_secs }} + metrics_retention_secs = {{ . }} + {{- end }} + {{- end }} + + {{- with .Values.control.config.helm }} + + [helm] + {{- with .chart }} + chart = {{ . | quote }} + {{- end }} + {{- with .repo }} + repo = {{ . | quote }} + {{- end }} + {{- end }} + + {{- if or $cookieSecret (gt (len $auth) 0) }} + + [auth] + {{- if $cookieSecret }} + cookie_secret = {{ $cookieSecret | quote }} + {{- end }} + {{- with $auth.redirect_base_url }} + redirect_base_url = {{ . | quote }} + {{- end }} + {{- if hasKey $auth "cookie_secure" }} + cookie_secure = {{ $auth.cookie_secure }} + {{- end }} + {{- with $auth.session_max_age_days }} + session_max_age_days = {{ . }} + {{- end }} + {{- with $auth.state_max_age_min }} + state_max_age_min = {{ . }} + {{- end }} + {{- with $auth.github }} + + [auth.github] + {{- with .client_id }} + client_id = {{ . | quote }} + {{- end }} + {{- with .client_secret }} + client_secret = {{ . | quote }} + {{- end }} + {{- with .allowed_orgs }} + allowed_orgs = [{{ range $i, $org := . }}{{ if $i }}, {{ end }}{{ $org | quote }}{{ end }}] + {{- end }} + {{- end }} + {{- with $auth.google }} + + [auth.google] + {{- with .client_id }} + client_id = {{ . | quote }} + {{- end }} + {{- with .client_secret }} + client_secret = {{ . | quote }} + {{- end }} + {{- with .allowed_domains }} + allowed_domains = [{{ range $i, $d := . }}{{ if $i }}, {{ end }}{{ $d | quote }}{{ end }}] + {{- end }} + {{- end }} + {{- end }} + + {{- $redis := .Values.control.config.redis | default dict }} + {{- $redisUrl := $redis.url | default (printf "redis://%s.%s.svc.cluster.local:6379" (include "pgdog-control.redis.fullname" .) .Release.Namespace) }} + + [redis] + url = {{ $redisUrl | quote }} + {{- with $redis.save_interval_secs }} + save_interval_secs = {{ . }} + {{- end }} diff --git a/templates/deployment.yaml b/templates/deployment.yaml index 4bef9a0..c3efb7f 100644 --- a/templates/deployment.yaml +++ b/templates/deployment.yaml @@ -1,20 +1,23 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: {{ .Release.Name }}-control + name: {{ include "pgdog-control.control.fullname" . }} labels: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.labels" . | nindent 4 }} spec: replicas: {{ .Values.control.replicas | default 1 }} selector: matchLabels: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.selectorLabels" . | nindent 6 }} template: metadata: annotations: - kubectl.kubernetes.io/restartedAt: {{ now | date "2006-01-02T15:04:05Z07:00" | quote }} + # Roll the deployment when the rendered configmap changes. Uses + # a chart-scoped key so it doesn't fight `kubectl rollout restart` + # over `kubectl.kubernetes.io/restartedAt` under server-side apply. + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} labels: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.selectorLabels" . | nindent 8 }} spec: {{- if gt (int (.Values.control.replicas | default 1)) 1 }} topologySpreadConstraints: @@ -23,42 +26,58 @@ spec: whenUnsatisfiable: DoNotSchedule labelSelector: matchLabels: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.selectorLabels" . | nindent 12 }} {{- end }} + {{- if .Values.control.rbac.create }} + serviceAccountName: {{ include "pgdog-control.control.serviceAccountName" . }} + automountServiceAccountToken: true + {{- else }} automountServiceAccountToken: false + {{- end }} securityContext: seccompProfile: type: RuntimeDefault + {{- with .Values.image.pullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} containers: - name: control - image: ghcr.io/pgdogdev/pgdog-enterprise/control:{{ .Values.image.tag | default .Chart.AppVersion }} - imagePullPolicy: Always + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} securityContext: allowPrivilegeEscalation: false capabilities: drop: ["ALL"] env: - {{- range $key, $value := .Values.env }} - {{- if and (ne $key "REDIS_URL") (ne $key "FRONTEND_URL") (ne $key "GOOGLE_REDIRECT_URL") (ne $key "SESSION_KEY") }} - - name: {{ $key }} - value: {{ $value | quote }} + - name: CONTROL_CONFIG + value: /etc/pgdog-control/control.toml + # control2's whoami uses these to look up the Service that fronts + # this pod (src/models/deployment/kube/whoami.rs); without them + # endpoint() falls back to the host.minikube.internal dev URL. + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + {{- with .Values.control.aws.region }} + - name: AWS_REGION + value: {{ . | quote }} {{- end }} + {{- if .Values.control.aws.accessKeyId }} + envFrom: + - secretRef: + name: {{ include "pgdog-control.control.fullname" . }}-aws-creds {{- end }} - - name: SESSION_KEY - valueFrom: - secretKeyRef: - name: {{ .Release.Name }}-secrets - key: SESSION_KEY - - name: REDIS_URL - value: "redis://{{ .Release.Name }}-redis.{{ .Release.Namespace }}.svc.cluster.local:6379" - - name: FRONTEND_URL - value: "https://{{ .Values.ingress.host }}" - - name: GOOGLE_REDIRECT_URL - value: "https://{{ .Values.ingress.host }}/google/oauth/callback" - - name: GITHUB_REDIRECT_URL - value: "https://{{ .Values.ingress.host }}/github/oauth/callback" + volumeMounts: + - name: config + mountPath: /etc/pgdog-control + readOnly: true ports: - - containerPort: 8080 + - containerPort: {{ .Values.control.port }} name: http resources: requests: @@ -70,7 +89,7 @@ spec: livenessProbe: httpGet: path: /healthz - port: 8080 + port: {{ .Values.control.port }} initialDelaySeconds: 30 periodSeconds: 30 timeoutSeconds: 5 @@ -78,8 +97,15 @@ spec: readinessProbe: httpGet: path: /healthz - port: 8080 + port: {{ .Values.control.port }} initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 3 failureThreshold: 3 + volumes: + - name: config + configMap: + name: {{ include "pgdog-control.control.fullname" . }}-config + items: + - key: control.toml + path: control.toml diff --git a/templates/ingress.yaml b/templates/ingress.yaml index bb9e8de..6e0e2bf 100644 --- a/templates/ingress.yaml +++ b/templates/ingress.yaml @@ -1,9 +1,10 @@ +{{- if .Values.ingress.enabled }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: {{ .Release.Name }}-control + name: {{ include "pgdog-control.control.fullname" . }} labels: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.labels" . | nindent 4 }} {{- if .Values.ingress.tls.enabled }} annotations: cert-manager.io/cluster-issuer: {{ .Values.ingress.clusterIssuer }} @@ -25,6 +26,7 @@ spec: pathType: Prefix backend: service: - name: {{ .Release.Name }}-control + name: {{ include "pgdog-control.control.fullname" . }} port: number: 80 +{{- end }} diff --git a/templates/networkpolicy.yaml b/templates/networkpolicy.yaml index 7b14e44..5ba56fe 100644 --- a/templates/networkpolicy.yaml +++ b/templates/networkpolicy.yaml @@ -2,13 +2,13 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: - name: {{ .Release.Name }}-control + name: {{ include "pgdog-control.control.fullname" . }} labels: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.labels" . | nindent 4 }} spec: podSelector: matchLabels: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.selectorLabels" . | nindent 6 }} policyTypes: - Ingress - Egress @@ -19,12 +19,12 @@ spec: kubernetes.io/metadata.name: ingress-nginx ports: - protocol: TCP - port: 8080 + port: {{ .Values.control.port }} egress: - to: - podSelector: matchLabels: - app: {{ .Release.Name }}-redis + {{- include "pgdog-control.redis.selectorLabels" . | nindent 10 }} ports: - protocol: TCP port: 6379 @@ -61,13 +61,13 @@ spec: apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: - name: {{ .Release.Name }}-redis + name: {{ include "pgdog-control.redis.fullname" . }} labels: - app: {{ .Release.Name }}-redis + {{- include "pgdog-control.redis.labels" . | nindent 4 }} spec: podSelector: matchLabels: - app: {{ .Release.Name }}-redis + {{- include "pgdog-control.redis.selectorLabels" . | nindent 6 }} policyTypes: - Ingress - Egress @@ -75,7 +75,7 @@ spec: - from: - podSelector: matchLabels: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.selectorLabels" . | nindent 10 }} ports: - protocol: TCP port: 6379 diff --git a/templates/rbac.yaml b/templates/rbac.yaml new file mode 100644 index 0000000..a694c71 --- /dev/null +++ b/templates/rbac.yaml @@ -0,0 +1,102 @@ +{{- if .Values.control.rbac.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "pgdog-control.control.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "pgdog-control.labels" . | nindent 4 }} + {{- with .Values.control.aws.roleArn }} + annotations: + eks.amazonaws.com/role-arn: {{ . | quote }} + {{- end }} +--- +# Cluster-scoped because control2 lists namespaces and reads workloads +# across all namespaces (Api::all in src/models/deployment/kube/client.rs). +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "pgdog-control.control.fullname" . }} + labels: + {{- include "pgdog-control.labels" . | nindent 4 }} +rules: + # control2 lists/reads these cluster-wide (Api::all) to render the + # workload, services, and pod views across every namespace. + - apiGroups: [""] + resources: ["namespaces", "configmaps", "secrets", "services"] + verbs: ["get", "list"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + # /api/web/deployments/{id}/logs streams from pod stdout. + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get"] + # `watch` powers src/models/deployment/kube/watcher.rs (event-driven + # refresh). Read-only cluster-wide; write verbs (patch for rollout + # restart) live in the namespace-scoped Role below so they're confined + # to `writeNamespaces`. + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "pgdog-control.control.fullname" . }} + labels: + {{- include "pgdog-control.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "pgdog-control.control.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ include "pgdog-control.control.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- range $ns := .Values.control.rbac.writeNamespaces }} +--- +# Namespace-scoped write access for workloads control2 manages. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "pgdog-control.control.fullname" $ }} + namespace: {{ $ns }} + labels: + {{- include "pgdog-control.labels" $ | nindent 4 }} +rules: + # Workload resources the PgDog chart renders in each install namespace + # — kept in lockstep with `pgdog-helm/templates/` (and with HELM_KINDS + # in src/models/deployment/kube/can_i/helm.rs, which is what the can-i + # report audits). Adding a new kind here without updating either side + # produces silent gaps. + - apiGroups: [""] + resources: ["configmaps", "secrets", "services", "serviceaccounts"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["rbac.authorization.k8s.io"] + resources: ["roles", "rolebindings"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "pgdog-control.control.fullname" $ }} + namespace: {{ $ns }} + labels: + {{- include "pgdog-control.labels" $ | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "pgdog-control.control.fullname" $ }} +subjects: + - kind: ServiceAccount + name: {{ include "pgdog-control.control.serviceAccountName" $ }} + namespace: {{ $.Release.Namespace }} +{{- end }} +{{- end }} diff --git a/templates/redis-deployment.yaml b/templates/redis-deployment.yaml index d69709a..2222387 100644 --- a/templates/redis-deployment.yaml +++ b/templates/redis-deployment.yaml @@ -1,18 +1,18 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: {{ .Release.Name }}-redis + name: {{ include "pgdog-control.redis.fullname" . }} labels: - app: {{ .Release.Name }}-redis + {{- include "pgdog-control.redis.labels" . | nindent 4 }} spec: replicas: 1 selector: matchLabels: - app: {{ .Release.Name }}-redis + {{- include "pgdog-control.redis.selectorLabels" . | nindent 6 }} template: metadata: labels: - app: {{ .Release.Name }}-redis + {{- include "pgdog-control.redis.selectorLabels" . | nindent 8 }} annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: "false" spec: diff --git a/templates/redis-pdb.yaml b/templates/redis-pdb.yaml index 476481d..8d15b69 100644 --- a/templates/redis-pdb.yaml +++ b/templates/redis-pdb.yaml @@ -1,11 +1,11 @@ apiVersion: policy/v1 kind: PodDisruptionBudget metadata: - name: {{ .Release.Name }}-redis + name: {{ include "pgdog-control.redis.fullname" . }} labels: - app: {{ .Release.Name }}-redis + {{- include "pgdog-control.redis.labels" . | nindent 4 }} spec: minAvailable: 1 selector: matchLabels: - app: {{ .Release.Name }}-redis + {{- include "pgdog-control.redis.selectorLabels" . | nindent 6 }} diff --git a/templates/redis-service.yaml b/templates/redis-service.yaml index 8dfde45..00390c2 100644 --- a/templates/redis-service.yaml +++ b/templates/redis-service.yaml @@ -1,9 +1,9 @@ apiVersion: v1 kind: Service metadata: - name: {{ .Release.Name }}-redis + name: {{ include "pgdog-control.redis.fullname" . }} labels: - app: {{ .Release.Name }}-redis + {{- include "pgdog-control.redis.labels" . | nindent 4 }} spec: type: ClusterIP ports: @@ -12,4 +12,4 @@ spec: protocol: TCP name: redis selector: - app: {{ .Release.Name }}-redis + {{- include "pgdog-control.redis.selectorLabels" . | nindent 4 }} diff --git a/templates/secrets.yaml b/templates/secrets.yaml index 7fa0069..32d341f 100644 --- a/templates/secrets.yaml +++ b/templates/secrets.yaml @@ -1,14 +1,16 @@ -{{- $existingSecret := lookup "v1" "Secret" .Release.Namespace (printf "%s-secrets" .Release.Name) }} +{{- if not (.Values.control.config.auth | default dict).cookie_secret -}} +{{- $existing := lookup "v1" "Secret" .Release.Namespace (printf "%s-secrets" .Release.Name) -}} apiVersion: v1 kind: Secret metadata: name: {{ .Release.Name }}-secrets labels: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.labels" . | nindent 4 }} type: Opaque data: - {{- if $existingSecret }} - SESSION_KEY: {{ index $existingSecret.data "SESSION_KEY" }} + {{- if and $existing (index $existing.data "cookie_secret") }} + cookie_secret: {{ index $existing.data "cookie_secret" }} {{- else }} - SESSION_KEY: {{ randAlphaNum 64 | b64enc }} + cookie_secret: {{ randAlphaNum 64 | b64enc }} {{- end }} +{{- end -}} diff --git a/templates/service.yaml b/templates/service.yaml index bc33af5..d30a3ed 100644 --- a/templates/service.yaml +++ b/templates/service.yaml @@ -1,15 +1,15 @@ apiVersion: v1 kind: Service metadata: - name: {{ .Release.Name }}-control + name: {{ include "pgdog-control.control.fullname" . }} labels: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.labels" . | nindent 4 }} spec: type: ClusterIP ports: - port: 80 - targetPort: 8080 + targetPort: {{ .Values.control.port }} protocol: TCP name: http selector: - app: {{ .Release.Name }}-control + {{- include "pgdog-control.selectorLabels" . | nindent 4 }} diff --git a/values.yaml b/values.yaml index 22c906e..431040c 100644 --- a/values.yaml +++ b/values.yaml @@ -1,10 +1,48 @@ -env: - # DATABASE_URL: "" - # GOOGLE_CLIENT_ID: "" - # GOOGLE_CLIENT_SECRET: "" - # SESSION_KEY: "" +image: + repository: ghcr.io/pgdogdev/pgdog-enterprise/control + # tag defaults to .Chart.AppVersion + tag: "" + pullPolicy: IfNotPresent + pullSecrets: [] control: + port: 8080 + aws: + # IRSA role for AWS API access (RDS/CloudWatch). When set, the + # ServiceAccount is annotated with eks.amazonaws.com/role-arn so the + # EKS pod-identity webhook injects AWS_ROLE_ARN + + # AWS_WEB_IDENTITY_TOKEN_FILE into the container, and the AWS SDK + # exchanges the projected SA token for temporary credentials via STS. + # Requires that an OIDC provider for the cluster is registered in IAM, + # and that the role's trust policy permits this SA + # (system:serviceaccount::-control). + roleArn: "" + # Static IAM-user credentials for testing against AWS from a cluster + # without IRSA (minikube, kind). When accessKeyId is set the chart + # renders a `-aws-creds` Secret and the deployment loads it + # via envFrom. Leave empty in EKS — roleArn above is preferred. + accessKeyId: "" + secretAccessKey: "" + sessionToken: "" # optional; only for temporary credentials + # AWS region the SDK should target. Emitted as AWS_REGION on the + # container. Required when the pod isn't on an EKS node whose IMDS + # exposes one. + region: "" + rbac: + # Create a ServiceAccount, ClusterRole, and ClusterRoleBinding granting + # the control pod read access to namespaces, deployments, statefulsets, + # configmaps, and secrets across the cluster. Required for the Kube + # workload inspector in the UI. + create: true + # Override the ServiceAccount name. When empty, falls back to + # `-control`. + serviceAccountName: "" + # Namespaces where control2 is allowed to create/update/patch/delete + # workloads (deployments, statefulsets, configmaps, secrets, services). + # A Role + RoleBinding is generated in each listed namespace. Empty + # means no write access anywhere — list the release namespace + # explicitly if you want control to manage workloads alongside itself. + writeNamespaces: [] resources: requests: memory: "256Mi" @@ -12,6 +50,46 @@ control: limits: memory: "4Gi" cpu: "1000m" + # Runtime configuration written to /etc/pgdog-control/control.toml. + # Mirrors control2/src/config.rs. Every field is optional — anything left + # unset falls back to the Rust-side default. Field names match the TOML + # keys (snake_case) so values are passed through verbatim. + config: + rds: {} + # refresh_interval_secs: 60 + kube: {} + # refresh_interval_secs: 15 + dns: {} + # refresh_interval_secs: 30 + cloudwatch: {} + # refresh_interval_secs: 60 + # lookback_secs: 3600 + # period_secs: 60 + store: {} + # tick_secs: 1 + # stale_after_secs: 5 + # evict_after_secs: 60 + # metrics_retention_secs: 300 + helm: {} + # chart: pgdog + # repo: pgdogdev + auth: {} + # cookie_secret: "" # optional; random key generated at boot when absent + # redirect_base_url: "" # e.g. https://control.example.com + # cookie_secure: true + # session_max_age_days: 30 + # state_max_age_min: 10 + # github: + # client_id: "" + # client_secret: "" + # allowed_orgs: [] + # google: + # client_id: "" + # client_secret: "" + # allowed_domains: [] + redis: {} + # url: "" # optional; defaults to in-cluster redis + # save_interval_secs: 60 redis: resources: @@ -26,6 +104,7 @@ networkPolicy: enabled: false ingress: + enabled: true tls: enabled: true clusterIssuer: "letsencrypt-prod" From 9eef7daae9e574d4597c774c4f421156ade79a01 Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Thu, 21 May 2026 10:00:12 -0700 Subject: [PATCH 2/3] save --- Chart.yaml | 2 +- README.md | 512 ++++++++++++++++++++++++++++++++++- templates/deployment.yaml | 20 +- templates/ingress-aws.yaml | 36 +++ templates/ingress-nginx.yaml | 35 +++ templates/ingress.yaml | 28 +- values.yaml | 58 +++- 7 files changed, 672 insertions(+), 19 deletions(-) create mode 100644 templates/ingress-aws.yaml create mode 100644 templates/ingress-nginx.yaml diff --git a/Chart.yaml b/Chart.yaml index 0e2480d..05ae0a4 100644 --- a/Chart.yaml +++ b/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: pgdog-control description: PgDog Control type: application -version: 0.1.10 +version: 0.2.0 appVersion: "v2026-04-06" diff --git a/README.md b/README.md index 41bfcd2..75b214f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,513 @@ # PgDog EE Helm Chart -Production-ready [Helm](https://helm.sh) chart for PgDog Enterprise [Control Plane](https://docs.pgdog.dev/enterprise_edition/control_plane/). +Production-ready [Helm](https://helm.sh) chart for the PgDog Enterprise +[Control Plane](https://docs.pgdog.dev/enterprise_edition/control_plane/). + +## Installation + +```sh +helm repo add pgdogdev-ee https://helm-ee.pgdog.dev +helm install control pgdogdev-ee/pgdog-control +``` + +## Chart summary + +This chart installs two deployments: PgDog control plane and Redis. + +The PgDog deployment contains the following components: + +| Components | Description | +|-|-| +| Deployment | PgDog control plane deployment, with one replica. | +| Service | Service pointing to the deployment. Selector labels are configured automatically. | +| Ingress | Three (3) types of ingress are supported. See [ingress](#ingress) for more details. | +| ConfigMap | Configuration for the control plane. | +| Secret | Secret that stores the key used to encrypt authentication cookies. | +| Service account, Cluster role, Cluster role bindings | Service account with RBAC to access select Kube APIs. See [RBAC](#rbac) for more details. | + +In addition to installing the PgDog control plane, this chart will deploy a Redis deployment (with one replica). The control plane uses Redis for storing +metrics. The Redis deployment has the following components: + +| Components | Description | +|-|-| +| Deployment | Redis deployment with one replica. | +| Service | Redis service pointing to the deployment, with selector labels configured automatically. | + +### Ingress + +The PgDog control plane has a web dashboard. It can be accessed through the Ingress the chart creates. The chart supports 3 kinds of ingress: + +- Nginx +- AWS ALB +- Default + +Nginx and AWS ALB ingresses are presets, with set annotations that should work for most deployments. The Default ingress allows the user to configure all Ingress options (class, annotations, etc.). + +The mode is selected by `ingress.mode`. The chart renders exactly one Ingress, whose rule always routes `/` to the control Service on port 80. Only the metadata, `ingressClassName`, and `tls` differ between modes. + +All three modes share the options below: + +| Option | Description | +|-|-| +| `ingress.enabled` | Enable/disable the Ingress (bool, default `true`). | +| `ingress.mode` | One of `nginx`, `aws`, or `default`. Defaults to `nginx`. | +| `ingress.host` | External hostname, e.g. pgdog.acme.com. Required for Nginx and AWS ALB; optional for Default. | +| `ingress.labels` | Extra `metadata.labels` merged on top of the chart's standard labels (map, default `{}`). | + +#### Nginx + +The Nginx preset targets [ingress-nginx](https://kubernetes.github.io/ingress-nginx/) with [cert-manager](https://cert-manager.io/) handling certificate issuance. The chart hardcodes `ingressClassName: nginx`, emits the cert-manager and ssl-redirect annotations, and renders a `tls` block that references `-control-tls`. The cert-manager fills that Secret in response to the cluster issuer. + +```yaml +ingress: + enabled: true + mode: nginx + host: pgdog.acme.com + nginx: + tls: + enabled: true + clusterIssuer: letsencrypt-prod + sslRedirect: "true" +``` + +| Option | Description | +|-|-| +| `ingress.nginx.tls.enabled` | When `true`, emits the cert-manager and ssl-redirect annotations and a `tls` block referencing `-control-tls` (bool, default `true`). | +| `ingress.nginx.clusterIssuer` | Value of the `cert-manager.io/cluster-issuer` annotation (string, default `letsencrypt-prod`). | +| `ingress.nginx.sslRedirect` | Value of the `nginx.ingress.kubernetes.io/ssl-redirect` annotation. Quoted because nginx expects a string (string, default `"true"`). | + +#### AWS ALB + +The AWS ALB preset targets the [AWS Load Balancer Controller](https://kubernetes-sigs.github.io/aws-load-balancer-controller/). The chart hardcodes `ingressClassName: alb` and `alb.ingress.kubernetes.io/target-type: ip`, and lets ACM terminate TLS at the load balancer. Supply an ACM cert ARN to get the HTTPS listener; leave it empty for HTTP-only. + +```yaml +ingress: + enabled: true + mode: aws + host: control.example.com + aws: + scheme: internet-facing + certificateArn: arn:aws:acm:us-east-1:111111111111:certificate/abc-123 + sslRedirect: true +``` + +| Option | Description | +|-|-| +| `ingress.aws.scheme` | `alb.ingress.kubernetes.io/scheme`. Either `internet-facing` or `internal` (string, default `internet-facing`). | +| `ingress.aws.certificateArn` | ACM cert ARN attached to the HTTPS listener. Empty = HTTP-only ALB, no 443 listener (string, default `""`). | +| `ingress.aws.sslRedirect` | When `true` and `certificateArn` is set, the ALB redirects HTTP:80 → HTTPS:443. Ignored when `certificateArn` is empty (bool, default `true`). | + +### Default + +The Default mode is selected with `ingress.mode: default`. The chart adds nothing on top: no annotations, no `ingressClassName`, no `tls` block. You can route through any controller (Traefik, HAProxy, Contour, GKE, etc.) by supplying the keys it expects. + +```yaml +ingress: + enabled: true + mode: default + host: control.example.com + ingressClassName: traefik + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: websecure + cert-manager.io/cluster-issuer: letsencrypt-prod + tls: + - hosts: [control.example.com] + secretName: control-tls +``` + +| Option | Description | +|-|-| +| `ingress.ingressClassName` | Rendered as `spec.ingressClassName` when non-empty (string, default `""`). | +| `ingress.annotations` | Rendered verbatim as `metadata.annotations` (map, default `{}`). | +| `ingress.tls` | Rendered verbatim under `spec.tls`. Supply the full `[{hosts, secretName}]` list (list, default `[]`). | + +### DNS + +If you need TLS, you will also need to setup DNS. In AWS, you can create a Route53 CNAME record pointing to the ALB and issue a cert for it in ACM. If using the Nginx controller, `cert-manager` will issue the certificate, but you still need to create a DNS record manually. + +## RBAC + +The control plane talks to the Kubernetes API in two distinct ways: it **reads** workloads from every namespace so the dashboard can render them, and it **writes** to a short list of namespaces where you actually want it to manage PgDog deployments. The chart's RBAC matches that split. Broad read everywhere, narrow write only where you opt in. + +When `control.rbac.create` is `true` (the default), the chart renders: + +- A `ServiceAccount` for the control pod. If `control.aws.roleArn` is set, the ServiceAccount also carries the `eks.amazonaws.com/role-arn` annotation, which is what EKS IRSA looks for when handing the pod temporary AWS credentials. +- A `ClusterRole` and `ClusterRoleBinding` granting **read-only** access cluster-wide. This is enough for the dashboard to list namespaces and read deployments, statefulsets, pods, services, configmaps, and secrets in any namespace. It cannot change anything. Pod logs are included so the deployment log view works. +- For each namespace you list in `control.rbac.writeNamespaces`, a namespace-scoped `Role` and `RoleBinding` granting **write** access (create, update, patch, delete) on the resources PgDog actually manages: deployments, statefulsets, services, configmaps, secrets, service accounts, roles, role bindings, and pod disruption budgets. Namespaces not on the list stay strictly read-only. + +A typical setup grants write access only to the namespaces where you want PgDog clusters to live: + +```yaml +control: + rbac: + create: true + writeNamespaces: + - pgdog-prod + - pgdog-staging +``` + +With the above, the dashboard can see workloads in every namespace, but it can only spin up or tear down PgDog deployments in `pgdog-prod` and `pgdog-staging`. Leaving `writeNamespaces` empty produces a fully read-only install. The dashboard still works, but the "deploy" actions will be rejected by the API server. + +| Option | Description | +|-|-| +| `control.rbac.create` | Render the ServiceAccount and the RBAC bindings. When `false`, no RBAC is rendered and the pod runs without a mounted API token. The Kubernetes views in the dashboard will be empty (bool, default `true`). | +| `control.rbac.serviceAccountName` | Override the generated ServiceAccount name. Empty falls back to `-control` (string, default `""`). | +| `control.rbac.writeNamespaces` | Namespaces where the control plane is allowed to manage PgDog workloads. Each entry produces one Role + RoleBinding pair. Empty means the install is read-only everywhere (list, default `[]`). | + +### Disabling RBAC + +If your cluster manages RBAC out-of-band (a platform team's controller, GitOps, an admission policy), set `control.rbac.create: false`. The chart then renders no ServiceAccount, no ClusterRole/Binding, and no Role/Bindings, and the deployment runs the pod with `automountServiceAccountToken: false`. The dashboard still serves the UI, but every Kubernetes-backed view will be empty until you bind an externally-managed ServiceAccount with equivalent permissions to the pod yourself. + +## AWS access (EKS / IRSA) + +The control plane reads RDS topology and CloudWatch metrics so the dashboard can show your databases alongside the PgDog workloads. To do that on EKS without baking long-lived keys into the cluster, the recommended path is **IRSA** (IAM Roles for Service Accounts): the pod's ServiceAccount is annotated with an IAM role ARN, the EKS pod-identity webhook injects a projected token, and the AWS SDK inside the container exchanges that token for temporary credentials via STS. + +This needs three things, only one of which is in the chart: + +1. **An OIDC provider for the cluster, registered in IAM.** This is a one-time per-cluster setup (`eksctl utils associate-iam-oidc-provider --cluster --approve`, or the equivalent Terraform / console steps). Without it, STS has nothing to validate the projected token against. +2. **An IAM role** whose trust policy lets the pod's ServiceAccount assume it, with a permissions policy granting read access to RDS and CloudWatch. Details below. +3. **`control.aws.roleArn`** set to that role's ARN. The chart annotates the ServiceAccount with `eks.amazonaws.com/role-arn: `, and the rest happens automatically inside the pod. + +### Trust policy + +The role must trust the cluster's OIDC provider and scope the trust to the control plane's ServiceAccount. That subject is `system:serviceaccount::-control`, or whatever `control.rbac.serviceAccountName` is if you overrode it. Confirm the exact subject after `helm install` with: + +```sh +kubectl -n get sa \ + -l app.kubernetes.io/instance=,app.kubernetes.io/component=control \ + -o name +``` + +The role's trust policy has to match the SA name byte-for-byte. An off-by-one here surfaces as `AccessDenied: Not authorized to perform sts:AssumeRoleWithWebIdentity` in the pod logs. Replace the account ID, region, and OIDC ID with your own: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::111111111111:oidc-provider/oidc.eks.us-east-1.amazonaws.com/id/EXAMPLED539D4633E53DE1B71EXAMPLE" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "oidc.eks.us-east-1.amazonaws.com/id/EXAMPLED539D4633E53DE1B71EXAMPLE:sub": "system:serviceaccount:default:control-control", + "oidc.eks.us-east-1.amazonaws.com/id/EXAMPLED539D4633E53DE1B71EXAMPLE:aud": "sts.amazonaws.com" + } + } + } + ] +} +``` + +The `:sub` condition is what keeps any other pod in the cluster from assuming this role. Leaving it off would let any ServiceAccount with the OIDC trust pick it up. + +#### Generating the trust policy + +Rather than hand-edit the JSON, you can derive every field from the live cluster with `aws` and `kubectl`. Set the four inputs at the top, then pipe the output straight into `aws iam create-role` or save it to a file: + +```sh +CLUSTER=pgdog-prod +REGION=us-west-2 +NAMESPACE=control-staging +RELEASE=pgdog-control + +OIDC_HOST=$(aws eks describe-cluster --name "$CLUSTER" --region "$REGION" \ + --query 'cluster.identity.oidc.issuer' --output text | sed 's|^https://||') +ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +SA=$(kubectl -n "$NAMESPACE" get sa \ + -l app.kubernetes.io/instance="$RELEASE",app.kubernetes.io/component=control \ + -o jsonpath='{.items[0].metadata.name}') + +cat > trust-policy.json <-aws-creds` Secret and load it via `envFrom`. Don't do this on EKS; IRSA is strictly better. + +| Option | Description | +|-|-| +| `control.aws.roleArn` | IAM role ARN. When non-empty, annotates the ServiceAccount with `eks.amazonaws.com/role-arn` so the EKS pod-identity webhook can inject `AWS_ROLE_ARN` and `AWS_WEB_IDENTITY_TOKEN_FILE` (string, default `""`). | +| `control.aws.region` | AWS region the SDK targets. Rendered as `AWS_REGION` on the container (string, default `""`). | +| `control.aws.accessKeyId` / `secretAccessKey` / `sessionToken` | Static IAM-user credentials. Only for non-EKS clusters. Don't set these alongside `roleArn`; pick one (string, default `""`). | + +## Configuration + +The control plane reads its runtime configuration from a TOML file at `/etc/pgdog-control/control.toml`. The chart materializes that file from `control.config` in `values.yaml`. Every nested key under `control.config` becomes a TOML table, and field names map straight through. Every section and every field is optional; anything you omit falls back to a hardcoded default baked into the binary, so a minimal install only sets the handful of values that need to be non-default. + +Each subsection below covers one TOML section. The source of truth for the defaults lives in `control2/src/config.rs`. + +### Authentication + +`control.config.auth` wires up the OAuth-backed login flow for the dashboard. GitHub and Google are supported and can be enabled side by side. At least one needs to be configured before anyone outside the cluster can log in. + +```yaml +control: + config: + auth: + redirect_base_url: https://control.acme.com + cookie_secure: true + session_max_age_days: 30 + github: + client_id: Iv1.0123456789abcdef + client_secret: shhh + allowed_orgs: [acme-corp] + google: + client_id: 0123456789-abc.apps.googleusercontent.com + client_secret: shhh + allowed_domains: [acme.com] +``` + +| Option | Description | +|-|-| +| `redirect_base_url` | Public base URL of the dashboard. Used to build the OAuth redirect URI registered with each provider, e.g. `https://control.acme.com/auth/github/callback`. Defaults to `http://localhost:8080` (string, optional). | +| `cookie_secret` | Master key used to sign the session and CSRF cookies. **Leave empty in production.** The chart generates a random 64-character key on first install and stores it in a `-secrets` Secret, then reuses it on every `helm upgrade` via a `lookup` call so sessions survive rollouts. Setting this explicitly disables the helper Secret (string, optional). | +| `cookie_secure` | Set the `Secure` flag on cookies. Disable only for local HTTP testing (bool, default `true`). | +| `session_max_age_days` | Lifetime of the signed session cookie (int, default `30`). | +| `state_max_age_min` | Lifetime of the per-request CSRF state cookie. Has to outlive the user clicking through the provider's consent screen (int, default `10`). | +| `github.client_id` / `github.client_secret` | OAuth credentials from the GitHub App. Required to enable the GitHub login route. | +| `github.allowed_orgs` | If non-empty, only users whose membership the GitHub API reports in one of these orgs are allowed to log in. The `read:org` scope is added automatically when this list is non-empty (list of strings, default `[]`). | +| `google.client_id` / `google.client_secret` | OAuth credentials from the Google Cloud OAuth client. Required to enable the Google login route. | +| `google.allowed_domains` | If non-empty, only users whose verified Google email's domain (the part after `@`, compared case-insensitively) appears in this list are allowed to log in (list of strings, default `[]`). | + +### Helm + +When the dashboard provisions a new PgDog cluster, it shells out to `helm upgrade --install` against a chart fetched from a Helm repo. `control.config.helm` controls which chart and which repo. The defaults point at the public `pgdogdev` chart on `helm.pgdog.dev`, which is what you want unless you mirror the chart internally. + +```yaml +control: + config: + helm: + chart: pgdog + repo: pgdogdev + repo_url: https://helm.pgdog.dev +``` + +| Option | Description | +|-|-| +| `chart` | Chart name within the repo. The control plane installs `{repo}/{chart}` (string, default `pgdog`). | +| `repo` | Locally-registered repo name. Used both as the prefix in the chart reference and as the name passed to `helm repo add` (string, default `pgdogdev`). | +| `repo_url` | Repo index URL. This is what `helm repo add ` is pointed at on boot, so the dashboard doesn't need an out-of-band `helm repo add` step (string, default `https://helm.pgdog.dev`). | + +### Background polling + +The dashboard refreshes its view of the world by polling each backing system on a fixed cadence. Defaults are tuned for production; lower them if you want faster updates at the cost of more API calls, or raise them if you're trying to stay under a rate limit. CloudWatch and RDS settings are no-ops unless AWS credentials are configured. + +```yaml +control: + config: + rds: + refresh_interval_secs: 60 + kube: + refresh_interval_secs: 15 + dns: + refresh_interval_secs: 30 + cloudwatch: + refresh_interval_secs: 60 + lookback_secs: 3600 + period_secs: 60 +``` + +| Option | Description | +|-|-| +| `rds.refresh_interval_secs` | How often to poll AWS RDS for cluster and instance topology (int, default `60`). | +| `kube.refresh_interval_secs` | How often to poll Kubernetes for PgDog workloads. Independent of the `watch` streams, which fire on events (int, default `15`). | +| `dns.refresh_interval_secs` | How often to re-resolve every known RDS hostname (int, default `30`). | +| `cloudwatch.refresh_interval_secs` | How often to poll CloudWatch for per-instance metrics (int, default `60`). | +| `cloudwatch.lookback_secs` | How far back each fetch reaches. A fresh deploy pulls the full window on its first tick (int, default `3600`). | +| `cloudwatch.period_secs` | CloudWatch aggregation period. The smallest bucket the metric API returns (int, default `60`). | + +### State store + +`control.config.store` governs the in-memory metric store: how often it sweeps for stale data, when an instance is marked stale or evicted, and how long per-instance metric history is retained. The defaults are tight enough for an interactive dashboard; widen them if you keep the UI open against a cluster that's intentionally idle, or if you want a longer historical window in memory. + +```yaml +control: + config: + store: + tick_secs: 1 + stale_after_secs: 5 + evict_after_secs: 60 + metrics_retention_secs: 300 +``` + +| Option | Description | +|-|-| +| `tick_secs` | How often the sweep task wakes up. Sets the shortest possible reaction time for stale and evict transitions (int, default `1`). | +| `stale_after_secs` | Instance is marked stale if its newest metric is older than this. The UI dims it but keeps it visible (int, default `5`). | +| `evict_after_secs` | Instance is dropped from the store entirely if its newest metric is older than this (int, default `60`). | +| `metrics_retention_secs` | How much per-instance metric history is kept in memory. Older points are dropped as new ones arrive (int, default `300`). | + +### Redis persistence + +`control.config.redis` controls how the in-memory store is snapshotted to Redis between process restarts. The chart already provisions an in-cluster Redis (`-redis`) and the control plane points at it by default, so most installs leave this section alone. + +```yaml +control: + config: + redis: + url: redis://my-redis.cache:6379 + save_interval_secs: 60 +``` + +| Option | Description | +|-|-| +| `url` | Redis connection string. Leave empty to use the in-cluster Redis the chart installs; set it only to point at an external Redis (string, optional). | +| `save_interval_secs` | How often the background task snapshots the store to Redis (int, default `60`). | + +## Examples + +Two end-to-end `values.yaml` files for the most common setups. Save the snippet to `values.yaml` and install with: + +```sh +helm install control pgdogdev-ee/pgdog-control -f values.yaml +``` + +### EKS with the AWS Load Balancer Controller + +This example deploys to an EKS cluster that already has the AWS Load Balancer Controller and cert-manager-via-ACM set up. AWS credentials come from IRSA, so no static keys live in the cluster. TLS terminates at the ALB using a pre-issued ACM certificate. + +```yaml +control: + aws: + # IAM role assumed by the pod via IRSA. The role's trust policy must + # allow system:serviceaccount::-control. + roleArn: arn:aws:iam::111111111111:role/pgdog-control + region: us-east-1 + rbac: + create: true + # Namespaces where the control plane is allowed to manage PgDog + # clusters. The dashboard still sees workloads in every namespace. + writeNamespaces: + - pgdog-prod + - pgdog-staging + config: + auth: + redirect_base_url: https://control.acme.com + github: + client_id: Iv1.0123456789abcdef + client_secret: shhh-store-this-in-a-secret + allowed_orgs: [acme-corp] + +ingress: + enabled: true + mode: aws + host: control.acme.com + aws: + scheme: internet-facing + certificateArn: arn:aws:acm:us-east-1:111111111111:certificate/abc-123-def-456 + sslRedirect: true +``` + +DNS comes *after* the install: the ALB only exists once the AWS Load Balancer Controller has reconciled the Ingress. After `helm install`, wait for the Ingress's `ADDRESS` to populate (`kubectl get ingress control-control -w`), then create a Route53 `A`/`ALIAS` record for `control.acme.com` pointing at that ALB hostname. The ACM cert referenced by `certificateArn` is issued out-of-band (typically with DNS-01 against the same zone) and can be created before the install. Only the record that fronts the dashboard is order-dependent. + +### Vanilla Kubernetes with ingress-nginx and cert-manager + +This example targets a generic cluster (kubeadm, on-prem, or any managed Kubernetes) with [ingress-nginx](https://kubernetes.github.io/ingress-nginx/) handling traffic and [cert-manager](https://cert-manager.io/) issuing Let's Encrypt certificates. + +```yaml +control: + rbac: + create: true + writeNamespaces: + - pgdog-prod + - pgdog-staging + config: + auth: + redirect_base_url: https://control.acme.com + google: + client_id: 0123456789-abc.apps.googleusercontent.com + client_secret: shhh-store-this-in-a-secret + allowed_domains: [acme.com] + +ingress: + enabled: true + mode: nginx + host: control.acme.com + nginx: + tls: + enabled: true + clusterIssuer: letsencrypt-prod + sslRedirect: "true" +``` + +Before installing, create a DNS record for `control.acme.com` pointing at the LoadBalancer Service that fronts ingress-nginx. cert-manager solves the HTTP-01 challenge through the same Ingress once it's reachable, so the DNS record needs to be live before the first `helm install` (or the certificate stays pending until it is). diff --git a/templates/deployment.yaml b/templates/deployment.yaml index c3efb7f..f6dd2b3 100644 --- a/templates/deployment.yaml +++ b/templates/deployment.yaml @@ -12,10 +12,24 @@ spec: template: metadata: annotations: - # Roll the deployment when the rendered configmap changes. Uses - # a chart-scoped key so it doesn't fight `kubectl rollout restart` - # over `kubectl.kubernetes.io/restartedAt` under server-side apply. + # Roll the deployment when any rendered config / secret content + # changes. Each annotation hashes the *template output*, not + # values directly, so transformations applied inside the + # template (defaults, lookups, randAlphaNum on first install) + # are accounted for. Chart-scoped keys avoid colliding with + # `kubectl rollout restart`'s `kubectl.kubernetes.io/restartedAt` + # under server-side apply. + # + # `checksum/config` covers control.toml — which includes the + # rendered `cookie_secret` — so auth/secret rotation already + # falls under this hash. checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- if .Values.control.aws.accessKeyId }} + # AWS creds live in their own Secret, mounted via envFrom, so a + # config change there wouldn't otherwise propagate to running + # pods. + checksum/aws-creds: {{ include (print $.Template.BasePath "/aws-secret.yaml") . | sha256sum }} + {{- end }} labels: {{- include "pgdog-control.selectorLabels" . | nindent 8 }} spec: diff --git a/templates/ingress-aws.yaml b/templates/ingress-aws.yaml new file mode 100644 index 0000000..a08c824 --- /dev/null +++ b/templates/ingress-aws.yaml @@ -0,0 +1,36 @@ +{{- if and .Values.ingress.enabled (eq (.Values.ingress.mode | default "default") "aws") }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "pgdog-control.control.fullname" . }} + labels: + {{- include "pgdog-control.labels" . | nindent 4 }} + {{- with .Values.ingress.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + alb.ingress.kubernetes.io/scheme: {{ .Values.ingress.aws.scheme | quote }} + alb.ingress.kubernetes.io/target-type: ip + {{- if .Values.ingress.aws.certificateArn }} + alb.ingress.kubernetes.io/listen-ports: '[{"HTTP":80},{"HTTPS":443}]' + alb.ingress.kubernetes.io/certificate-arn: {{ .Values.ingress.aws.certificateArn | quote }} + {{- if .Values.ingress.aws.sslRedirect }} + alb.ingress.kubernetes.io/ssl-redirect: "443" + {{- end }} + {{- else }} + alb.ingress.kubernetes.io/listen-ports: '[{"HTTP":80}]' + {{- end }} +spec: + ingressClassName: alb + rules: + - {{ with .Values.ingress.host }}host: {{ . }} + {{ end }}http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ include "pgdog-control.control.fullname" . }} + port: + number: 80 +{{- end }} diff --git a/templates/ingress-nginx.yaml b/templates/ingress-nginx.yaml new file mode 100644 index 0000000..9aaaed6 --- /dev/null +++ b/templates/ingress-nginx.yaml @@ -0,0 +1,35 @@ +{{- if and .Values.ingress.enabled (eq (.Values.ingress.mode | default "default") "nginx") }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "pgdog-control.control.fullname" . }} + labels: + {{- include "pgdog-control.labels" . | nindent 4 }} + {{- with .Values.ingress.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- if .Values.ingress.nginx.tls.enabled }} + annotations: + cert-manager.io/cluster-issuer: {{ .Values.ingress.nginx.clusterIssuer }} + nginx.ingress.kubernetes.io/ssl-redirect: {{ .Values.ingress.nginx.sslRedirect | quote }} + {{- end }} +spec: + ingressClassName: nginx + {{- if .Values.ingress.nginx.tls.enabled }} + tls: + - hosts: + - {{ .Values.ingress.host }} + secretName: {{ .Release.Name }}-control-tls + {{- end }} + rules: + - {{ with .Values.ingress.host }}host: {{ . }} + {{ end }}http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ include "pgdog-control.control.fullname" . }} + port: + number: 80 +{{- end }} diff --git a/templates/ingress.yaml b/templates/ingress.yaml index 6e0e2bf..45b792c 100644 --- a/templates/ingress.yaml +++ b/templates/ingress.yaml @@ -1,26 +1,33 @@ {{- if .Values.ingress.enabled }} +{{- $mode := .Values.ingress.mode | default "default" }} +{{- if not (has $mode (list "aws" "nginx" "default")) }} +{{- fail (printf "ingress.mode must be one of: aws, nginx, default (got %q)" $mode) }} +{{- end }} +{{- if eq $mode "default" }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: {{ include "pgdog-control.control.fullname" . }} labels: {{- include "pgdog-control.labels" . | nindent 4 }} - {{- if .Values.ingress.tls.enabled }} + {{- with .Values.ingress.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.ingress.annotations }} annotations: - cert-manager.io/cluster-issuer: {{ .Values.ingress.clusterIssuer }} - nginx.ingress.kubernetes.io/ssl-redirect: {{ .Values.ingress.sslRedirect | quote }} + {{- toYaml . | nindent 4 }} {{- end }} spec: - ingressClassName: nginx - {{- if .Values.ingress.tls.enabled }} + {{- with .Values.ingress.ingressClassName }} + ingressClassName: {{ . }} + {{- end }} + {{- with .Values.ingress.tls }} tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Release.Name }}-control-tls + {{- toYaml . | nindent 4 }} {{- end }} rules: - - host: {{ .Values.ingress.host }} - http: + - {{ with .Values.ingress.host }}host: {{ . }} + {{ end }}http: paths: - path: / pathType: Prefix @@ -30,3 +37,4 @@ spec: port: number: 80 {{- end }} +{{- end }} diff --git a/values.yaml b/values.yaml index 431040c..641a54b 100644 --- a/values.yaml +++ b/values.yaml @@ -105,7 +105,57 @@ networkPolicy: ingress: enabled: true - tls: - enabled: true - clusterIssuer: "letsencrypt-prod" - sslRedirect: "true" + # mode selects the annotation / ingressClassName / TLS preset applied + # to the rendered Ingress. One of: + # nginx — ingress-nginx + cert-manager. TLS terminated at nginx + # via a cert-manager-issued Secret. Driven by + # ingress.nginx.*. + # aws — AWS Load Balancer Controller (ALB). TLS terminated at + # the ALB using an ACM cert. Driven by ingress.aws.*. + # default — no preset is applied. The chart only emits the rule that + # routes to the control Service; provide your own + # annotations, ingressClassName, and tls block via the + # top-level keys below. + mode: nginx + # host is the external hostname routed to control2. Required when + # mode is "nginx" or "aws"; optional in "default" mode (omit to leave + # the rule unbound to a host). + # host: control.example.com + # labels are merged into metadata.labels on top of the chart's + # standard labels. Applied in all three modes. + labels: {} + + # nginx-mode settings (ignored unless mode == "nginx"). + nginx: + tls: + # When true, emits cert-manager.io/cluster-issuer + + # nginx.ingress.kubernetes.io/ssl-redirect annotations and a tls + # block referencing -control-tls. + enabled: true + clusterIssuer: "letsencrypt-prod" + sslRedirect: "true" + + # aws-mode settings (ignored unless mode == "aws"). + aws: + # scheme controls whether the ALB is internet-facing or internal. + # Valid values: "internet-facing" or "internal". + scheme: "internet-facing" + # certificateArn is the ACM cert ARN attached to the HTTPS listener. + # Optional: when empty the ALB serves HTTP only (no 443 listener). + certificateArn: "" + # sslRedirect: when true and certificateArn is set, the ALB + # redirects HTTP:80 to HTTPS:443. Ignored when certificateArn is + # empty. + sslRedirect: true + + # default-mode settings (ignored unless mode == "default"). Provide + # whatever your environment's ingress controller expects; the chart + # passes these through verbatim and adds nothing on top. + annotations: {} + ingressClassName: "" + # tls is rendered as-is under spec.tls — supply the full list of + # {hosts, secretName} entries your controller expects, e.g.: + # tls: + # - hosts: [control.example.com] + # secretName: control-tls + tls: [] From 0c2270e1d9420cf55de3053c08df5a87f8234094 Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Thu, 21 May 2026 12:20:47 -0700 Subject: [PATCH 3/3] ok --- README.md | 84 ++++++++++++++++++++++++++++++++++++++++ templates/_helpers.tpl | 11 ++++++ templates/configmap.yaml | 17 ++++---- templates/rbac.yaml | 12 +++--- templates/secrets.yaml | 4 +- 5 files changed, 113 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 75b214f..dbd7483 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,90 @@ ingress: | `ingress.nginx.clusterIssuer` | Value of the `cert-manager.io/cluster-issuer` annotation (string, default `letsencrypt-prod`). | | `ingress.nginx.sslRedirect` | Value of the `nginx.ingress.kubernetes.io/ssl-redirect` annotation. Quoted because nginx expects a string (string, default `"true"`). | +##### Finding an existing ClusterIssuer + +If cert-manager is already installed, list the available issuers: + +```sh +kubectl get clusterissuers +``` + +The output looks like: + +``` +NAME READY AGE +letsencrypt-prod True 42d +letsencrypt-staging True 42d +``` + +Use the `NAME` column verbatim as `ingress.nginx.clusterIssuer`. `ClusterIssuer` is cluster-scoped, so you don't need `-n`. The issuer doesn't have to live in the release namespace. + +Check that `READY` is `True`. If it isn't, run `kubectl describe clusterissuer ` and fix the issuer first. Otherwise the cert request will stay in `Pending`. + +If the command returns `error: the server doesn't have a resource type "clusterissuers"`, cert-manager isn't installed. See below. + +##### Installing ingress-nginx and cert-manager from scratch + +On a cluster with neither component, install both before installing this chart. Order matters. Install ingress-nginx first. Then cert-manager. Then create a ClusterIssuer. + +**1. ingress-nginx** + +```sh +helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx +helm repo update +helm install ingress-nginx ingress-nginx/ingress-nginx \ + --namespace ingress-nginx --create-namespace +``` + +Wait for the controller's Service to get an external address. On most managed clusters that's a `LoadBalancer`. You'll need its hostname or IP for DNS: + +```sh +kubectl -n ingress-nginx get svc ingress-nginx-controller -w +``` + +Point `control.acme.com` (or whatever `ingress.host` you'll use) at that address before continuing. Let's Encrypt's HTTP-01 challenge fails if the hostname doesn't resolve to the controller. + +**2. cert-manager** + +```sh +helm repo add jetstack https://charts.jetstack.io +helm repo update +helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager --create-namespace \ + --set crds.enabled=true +``` + +**3. A ClusterIssuer** + +cert-manager doesn't ship issuers. You create them. Here's a minimal Let's Encrypt production issuer that solves HTTP-01 through ingress-nginx: + +```yaml +# letsencrypt-prod.yaml +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-prod +spec: + acme: + server: https://acme-v02.api.letsencrypt.org/directory + email: you@acme.com + privateKeySecretRef: + name: letsencrypt-prod-account-key + solvers: + - http01: + ingress: + ingressClassName: nginx +``` + +```sh +kubectl apply -f letsencrypt-prod.yaml +kubectl get clusterissuer letsencrypt-prod -w # wait for READY=True +``` + +For first-time setup, point `server` at `https://acme-staging-v02.api.letsencrypt.org/directory` and create a separate `letsencrypt-staging` issuer. Staging has much higher rate limits. You can iterate on the install without burning prod issuance quota. Once it works, re-issue against the prod issuer. + +Once the issuer is `READY=True`, set `ingress.nginx.clusterIssuer: letsencrypt-prod` in `values.yaml` and install the chart. + #### AWS ALB The AWS ALB preset targets the [AWS Load Balancer Controller](https://kubernetes-sigs.github.io/aws-load-balancer-controller/). The chart hardcodes `ingressClassName: alb` and `alb.ingress.kubernetes.io/target-type: ip`, and lets ACM terminate TLS at the load balancer. Supply an ACM cert ARN to get the HTTPS listener; leave it empty for HTTP-only. diff --git a/templates/_helpers.tpl b/templates/_helpers.tpl index 12ddf3f..4f59a08 100644 --- a/templates/_helpers.tpl +++ b/templates/_helpers.tpl @@ -36,6 +36,17 @@ Resource names for each component (preserve existing names for compatibility). {{- printf "%s-control" .Release.Name | trunc 63 | trimSuffix "-" }} {{- end }} +{{/* +Name for resources shared across releases — cluster-scoped objects +(ClusterRole, ClusterRoleBinding) and the namespaced Role/RoleBinding +written into each writeNamespace, which is reachable by every install +of this chart. Includes the release namespace so multiple installs on +the same cluster don't collide. +*/}} +{{- define "pgdog-control.control.clusterFullname" -}} +{{- printf "%s-%s-control" .Release.Name .Release.Namespace | trunc 63 | trimSuffix "-" }} +{{- end }} + {{- define "pgdog-control.redis.fullname" -}} {{- printf "%s-redis" .Release.Name | trunc 63 | trimSuffix "-" }} {{- end }} diff --git a/templates/configmap.yaml b/templates/configmap.yaml index cc44ef3..be6adf9 100644 --- a/templates/configmap.yaml +++ b/templates/configmap.yaml @@ -1,4 +1,5 @@ -{{- $auth := .Values.control.config.auth | default dict -}} +{{- $config := .Values.control.config | default dict -}} +{{- $auth := $config.auth | default dict -}} {{- $cookieSecret := "" -}} {{- if $auth.cookie_secret -}} {{- $cookieSecret = $auth.cookie_secret -}} @@ -16,14 +17,14 @@ metadata: {{- include "pgdog-control.labels" . | nindent 4 }} data: control.toml: | - {{- with .Values.control.config.rds }} + {{- with $config.rds }} [rds] {{- with .refresh_interval_secs }} refresh_interval_secs = {{ . }} {{- end }} {{- end }} - {{- with .Values.control.config.kube }} + {{- with $config.kube }} [kube] {{- with .refresh_interval_secs }} @@ -31,7 +32,7 @@ data: {{- end }} {{- end }} - {{- with .Values.control.config.dns }} + {{- with $config.dns }} [dns] {{- with .refresh_interval_secs }} @@ -39,7 +40,7 @@ data: {{- end }} {{- end }} - {{- with .Values.control.config.cloudwatch }} + {{- with $config.cloudwatch }} [cloudwatch] {{- with .refresh_interval_secs }} @@ -53,7 +54,7 @@ data: {{- end }} {{- end }} - {{- with .Values.control.config.store }} + {{- with $config.store }} [store] {{- with .tick_secs }} @@ -70,7 +71,7 @@ data: {{- end }} {{- end }} - {{- with .Values.control.config.helm }} + {{- with $config.helm }} [helm] {{- with .chart }} @@ -127,7 +128,7 @@ data: {{- end }} {{- end }} - {{- $redis := .Values.control.config.redis | default dict }} + {{- $redis := $config.redis | default dict }} {{- $redisUrl := $redis.url | default (printf "redis://%s.%s.svc.cluster.local:6379" (include "pgdog-control.redis.fullname" .) .Release.Namespace) }} [redis] diff --git a/templates/rbac.yaml b/templates/rbac.yaml index a694c71..82341da 100644 --- a/templates/rbac.yaml +++ b/templates/rbac.yaml @@ -16,7 +16,7 @@ metadata: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: {{ include "pgdog-control.control.fullname" . }} + name: {{ include "pgdog-control.control.clusterFullname" . }} labels: {{- include "pgdog-control.labels" . | nindent 4 }} rules: @@ -43,13 +43,13 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: {{ include "pgdog-control.control.fullname" . }} + name: {{ include "pgdog-control.control.clusterFullname" . }} labels: {{- include "pgdog-control.labels" . | nindent 4 }} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: {{ include "pgdog-control.control.fullname" . }} + name: {{ include "pgdog-control.control.clusterFullname" . }} subjects: - kind: ServiceAccount name: {{ include "pgdog-control.control.serviceAccountName" . }} @@ -60,7 +60,7 @@ subjects: apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - name: {{ include "pgdog-control.control.fullname" $ }} + name: {{ include "pgdog-control.control.clusterFullname" $ }} namespace: {{ $ns }} labels: {{- include "pgdog-control.labels" $ | nindent 4 }} @@ -86,14 +86,14 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: - name: {{ include "pgdog-control.control.fullname" $ }} + name: {{ include "pgdog-control.control.clusterFullname" $ }} namespace: {{ $ns }} labels: {{- include "pgdog-control.labels" $ | nindent 4 }} roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: {{ include "pgdog-control.control.fullname" $ }} + name: {{ include "pgdog-control.control.clusterFullname" $ }} subjects: - kind: ServiceAccount name: {{ include "pgdog-control.control.serviceAccountName" $ }} diff --git a/templates/secrets.yaml b/templates/secrets.yaml index 32d341f..d87d4a6 100644 --- a/templates/secrets.yaml +++ b/templates/secrets.yaml @@ -1,4 +1,6 @@ -{{- if not (.Values.control.config.auth | default dict).cookie_secret -}} +{{- $config := .Values.control.config | default dict -}} +{{- $auth := $config.auth | default dict -}} +{{- if not $auth.cookie_secret -}} {{- $existing := lookup "v1" "Secret" .Release.Namespace (printf "%s-secrets" .Release.Name) -}} apiVersion: v1 kind: Secret