From 871e3e8e0a8bba0ec9afc028c1d646516b481aba Mon Sep 17 00:00:00 2001
From: Mat Kowalski <mko@redhat.com>
Date: Thu, 11 Jun 2026 17:56:28 +0200
Subject: [PATCH] OPNET-603: Fail haproxy test when all kubeapi servers are
 down simultaneously

The on-prem haproxy monitor test so far only flaked when haproxy
detected individual kube-apiserver backends as down, which is expected
over the course of installation or upgrade. What really matters is
whether at any point in time a single haproxy instance saw all
kube-apiserver backends down at the same time, because that means the
API was completely unreachable through the on-prem loadbalancer.

Add a sweep over the constructed OnPremHaproxyDetectsDown intervals
that finds, per haproxy instance, the time windows during which at
least three distinct backends were down simultaneously. The first such
window for every haproxy instance is tolerated, because when haproxy
starts during the installation all kube-apiservers are expected to be
down until they come up for the first time. Any subsequent window
fails the new junit test.

Implements Ben's suggestion from
https://github.com/openshift/origin/pull/29149#discussion_r1806784345
---
 .../network/onpremhaproxy/monitortest.go      | 152 +++++++++++-
 .../network/onpremhaproxy/monitortest_test.go | 217 ++++++++++++++++++
 2 files changed, 362 insertions(+), 7 deletions(-)
 create mode 100644 pkg/monitortests/network/onpremhaproxy/monitortest_test.go

diff --git a/pkg/monitortests/network/onpremhaproxy/monitortest.go b/pkg/monitortests/network/onpremhaproxy/monitortest.go
index 694be0902350..eb2da803d681 100644
--- a/pkg/monitortests/network/onpremhaproxy/monitortest.go
+++ b/pkg/monitortests/network/onpremhaproxy/monitortest.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"regexp"
+	"sort"
 	"strings"
 	"time"
 
@@ -211,22 +212,159 @@ func (*operatorLogAnalyzer) EvaluateTestsFromConstructedIntervals(ctx context.Co
 		testNameToFailures[testName] = append(testNameToFailures[testName], interval.String())
 	}
 
+	ret := []*junitapi.JUnitTestCase{}
 	if !somethingFailed {
-		return []*junitapi.JUnitTestCase{success}, nil
+		ret = append(ret, success)
+	} else {
+		failure := &junitapi.JUnitTestCase{
+			Name: testName,
+			FailureOutput: &junitapi.FailureOutput{
+				//Message: fmt.Sprint("something happened with haproxy"),
+				Output: "Haproxy detected some kubeapi-servers down. It's not necessarily an issue, it's expected over the course of installation. Go and check messages. Look at intervals in sippy to see a full graph of which haproxy instance detected which kubeapi-server as down. Plotted on a time axis, you will see if at any point in time all the kubeapi-servers were down. Only then, it is an issue.",
+			},
+			SystemOut: strings.Join(testNameToFailures[testName], "\n"),
+			//SystemErr: fmt.Sprintf("syserr; found %d lines in the failure map", len(testNameToFailures[testName])),
+		}
+
+		// Marked flaky until we have monitored it for consistency
+		ret = append(ret, failure, success)
+	}
+
+	ret = append(ret, evaluateFullAPIOutages(leaseIntervals)...)
+
+	return ret, nil
+}
+
+// fullOutageBackendThreshold is the number of distinct kube-apiserver backends that have to be
+// reported down at the same time by a single haproxy instance to consider it a full API outage.
+// On-prem HA deployments run three control plane nodes, so three backends down at the same time
+// mean the API is not reachable through the loadbalancer at all.
+const fullOutageBackendThreshold = 3
+
+// apiOutageWindow is a time range during which a single haproxy instance considered at least
+// fullOutageBackendThreshold kube-apiserver backends down at the same time.
+type apiOutageWindow struct {
+	from time.Time
+	to   time.Time
+}
+
+// findFullAPIOutageWindows takes the constructed OnPremHaproxyDetectsDown intervals and returns,
+// per node running haproxy, the time windows during which that haproxy instance reported at least
+// `threshold` distinct kube-apiserver backends down at the same time.
+func findFullAPIOutageWindows(downIntervals monitorapi.Intervals, threshold int) map[string][]apiOutageWindow {
+	type sweepEvent struct {
+		at    time.Time
+		delta int
+	}
+
+	eventsPerNode := map[string][]sweepEvent{}
+	for _, interval := range downIntervals {
+		// The locator key has the form "<node running haproxy>___<kube-apiserver backend>".
+		pairKey := interval.Locator.Keys[monitorapi.LocatorOnPremKubeapiUnreachableFromHaproxyKey]
+		parts := strings.SplitN(pairKey, "___", 2)
+		if len(parts) != 2 {
+			continue
+		}
+		reportingNode := parts[0]
+		eventsPerNode[reportingNode] = append(eventsPerNode[reportingNode],
+			sweepEvent{at: interval.From, delta: 1},
+			sweepEvent{at: interval.To, delta: -1},
+		)
+	}
+
+	ret := map[string][]apiOutageWindow{}
+	for node, events := range eventsPerNode {
+		// Sort by time. On equal timestamps process the "backend recovered" events first so that a
+		// backend recovering at the very same second another one goes down does not produce an
+		// artificial overlap.
+		sort.Slice(events, func(i, j int) bool {
+			if events[i].at.Equal(events[j].at) {
+				return events[i].delta < events[j].delta
+			}
+			return events[i].at.Before(events[j].at)
+		})
+
+		// Sweep over the events counting how many backends are down at any given moment. Intervals of
+		// a single backend never overlap by construction, so the number of open intervals equals the
+		// number of distinct backends being down.
+		windows := []apiOutageWindow{}
+		downCount := 0
+		inOutage := false
+		var outageStart time.Time
+		for _, event := range events {
+			downCount += event.delta
+			switch {
+			case !inOutage && downCount >= threshold:
+				inOutage = true
+				outageStart = event.at
+			case inOutage && downCount < threshold:
+				inOutage = false
+				windows = append(windows, apiOutageWindow{from: outageStart, to: event.at})
+			}
+		}
+
+		// Merge windows that touch each other. Log timestamps have second granularity, so a backend
+		// recovering and another one going down within the same second would otherwise split a single
+		// outage into two.
+		merged := []apiOutageWindow{}
+		for _, window := range windows {
+			if len(merged) > 0 && !window.from.After(merged[len(merged)-1].to) {
+				merged[len(merged)-1].to = window.to
+				continue
+			}
+			merged = append(merged, window)
+		}
+		if len(merged) > 0 {
+			ret[node] = merged
+		}
+	}
+
+	return ret
+}
+
+// evaluateFullAPIOutages produces a junit result failing whenever a single haproxy instance
+// reported all kube-apiserver backends down at the same time. The first occurrence for every
+// haproxy instance is tolerated: when haproxy starts during the installation, all kube-apiservers
+// are expected to be down until they come up for the first time. Any later occurrence means the
+// API was completely unreachable through the on-prem loadbalancer.
+func evaluateFullAPIOutages(downIntervals monitorapi.Intervals) []*junitapi.JUnitTestCase {
+	const testName = "[Jira: Networking / On-Prem Host Networking] Haproxy must not detect all kubeapi servers down simultaneously"
+
+	outagesPerNode := findFullAPIOutageWindows(downIntervals, fullOutageBackendThreshold)
+
+	nodes := make([]string, 0, len(outagesPerNode))
+	for node := range outagesPerNode {
+		nodes = append(nodes, node)
+	}
+	sort.Strings(nodes)
+
+	failures := []string{}
+	for _, node := range nodes {
+		// The first full outage observed by every haproxy instance is the initial state: when haproxy
+		// starts during the installation, none of the kube-apiservers is up yet.
+		for _, window := range outagesPerNode[node][1:] {
+			failures = append(failures, fmt.Sprintf(
+				"haproxy on node %s reported %d or more kube-apiserver backends down at the same time between %s and %s (%s)",
+				node, fullOutageBackendThreshold, window.from.Format(time.RFC3339), window.to.Format(time.RFC3339), window.to.Sub(window.from)))
+		}
+	}
+
+	if len(failures) == 0 {
+		return []*junitapi.JUnitTestCase{{Name: testName}}
 	}
 
 	failure := &junitapi.JUnitTestCase{
 		Name: testName,
 		FailureOutput: &junitapi.FailureOutput{
-			//Message: fmt.Sprint("something happened with haproxy"),
-			Output: "Haproxy detected some kubeapi-servers down. It's not necessarily an issue, it's expected over the course of installation. Go and check messages. Look at intervals in sippy to see a full graph of which haproxy instance detected which kubeapi-server as down. Plotted on a time axis, you will see if at any point in time all the kubeapi-servers were down. Only then, it is an issue.",
+			Output: "Haproxy detected all kube-apiserver backends down at the same time after the initial startup window. " +
+				"The first occurrence for every haproxy instance is expected: when haproxy starts during the installation, all kube-apiservers are down until they come up for the first time. " +
+				"Any subsequent occurrence means the API was completely unreachable through the on-prem loadbalancer. " +
+				"Look at the onprem-haproxy rows in the intervals chart to see which haproxy instance detected which kube-apiserver as down.",
 		},
-		SystemOut: strings.Join(testNameToFailures[testName], "\n"),
-		//SystemErr: fmt.Sprintf("syserr; found %d lines in the failure map", len(testNameToFailures[testName])),
+		SystemOut: strings.Join(failures, "\n"),
 	}
 
-	// Marked flaky until we have monitored it for consistency
-	return []*junitapi.JUnitTestCase{failure, success}, nil
+	return []*junitapi.JUnitTestCase{failure}
 }
 
 func (w *operatorLogAnalyzer) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
diff --git a/pkg/monitortests/network/onpremhaproxy/monitortest_test.go b/pkg/monitortests/network/onpremhaproxy/monitortest_test.go
new file mode 100644
index 000000000000..4730f2131463
--- /dev/null
+++ b/pkg/monitortests/network/onpremhaproxy/monitortest_test.go
@@ -0,0 +1,217 @@
+package onpremhaproxy
+
+import (
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/openshift/origin/pkg/monitor/monitorapi"
+)
+
+var testStart = time.Date(2024, time.October, 28, 7, 0, 0, 0, time.UTC)
+
+// at is a helper returning a time relative to the beginning of the test run.
+func at(seconds int) time.Time {
+	return testStart.Add(time.Duration(seconds) * time.Second)
+}
+
+// haproxyDownInterval builds a constructed OnPremHaproxyDetectsDown interval the same way
+// ConstructComputedIntervals does.
+func haproxyDownInterval(reportingNode, backend string, from, to time.Time) monitorapi.Interval {
+	return monitorapi.NewInterval(monitorapi.SourceHaproxyMonitor, monitorapi.Info).
+		Locator(monitorapi.Locator{Keys: map[monitorapi.LocatorKey]string{
+			monitorapi.LocatorOnPremKubeapiUnreachableFromHaproxyKey: fmt.Sprintf("%s___%s", reportingNode, backend),
+		}}).
+		Message(monitorapi.NewMessage().Reason(monitorapi.OnPremHaproxyDetectsDown).
+			Constructed(monitorapi.ConstructionOwnerOnPremHaproxy).
+			HumanMessage(fmt.Sprintf("Kubeapi on %s is detected dead by %s", backend, reportingNode))).
+		Display().
+		Build(from, to)
+}
+
+func TestFindFullAPIOutageWindows(t *testing.T) {
+	tests := []struct {
+		name      string
+		intervals monitorapi.Intervals
+		expected  map[string][]apiOutageWindow
+	}{
+		{
+			name:      "no intervals",
+			intervals: monitorapi.Intervals{},
+			expected:  map[string][]apiOutageWindow{},
+		},
+		{
+			name: "single backend flapping is not an outage",
+			intervals: monitorapi.Intervals{
+				haproxyDownInterval("master-0", "masters/master-1", at(0), at(60)),
+				haproxyDownInterval("master-0", "masters/master-1", at(120), at(180)),
+			},
+			expected: map[string][]apiOutageWindow{},
+		},
+		{
+			name: "two backends down at the same time is not a full outage",
+			intervals: monitorapi.Intervals{
+				haproxyDownInterval("master-0", "masters/master-1", at(0), at(60)),
+				haproxyDownInterval("master-0", "masters/master-2", at(30), at(90)),
+			},
+			expected: map[string][]apiOutageWindow{},
+		},
+		{
+			name: "three backends down at the same time",
+			intervals: monitorapi.Intervals{
+				haproxyDownInterval("master-0", "masters/master-0", at(0), at(50)),
+				haproxyDownInterval("master-0", "masters/master-1", at(10), at(60)),
+				haproxyDownInterval("master-0", "masters/master-2", at(20), at(40)),
+			},
+			expected: map[string][]apiOutageWindow{
+				"master-0": {{from: at(20), to: at(40)}},
+			},
+		},
+		{
+			name: "backends down on different haproxy instances do not add up",
+			intervals: monitorapi.Intervals{
+				haproxyDownInterval("master-0", "masters/master-0", at(0), at(60)),
+				haproxyDownInterval("master-1", "masters/master-1", at(0), at(60)),
+				haproxyDownInterval("master-2", "masters/master-2", at(0), at(60)),
+			},
+			expected: map[string][]apiOutageWindow{},
+		},
+		{
+			name: "two separate full outages",
+			intervals: monitorapi.Intervals{
+				haproxyDownInterval("master-0", "masters/master-0", at(0), at(60)),
+				haproxyDownInterval("master-0", "masters/master-1", at(0), at(60)),
+				haproxyDownInterval("master-0", "masters/master-2", at(0), at(60)),
+				haproxyDownInterval("master-0", "masters/master-0", at(600), at(630)),
+				haproxyDownInterval("master-0", "masters/master-1", at(600), at(630)),
+				haproxyDownInterval("master-0", "masters/master-2", at(600), at(630)),
+			},
+			expected: map[string][]apiOutageWindow{
+				"master-0": {
+					{from: at(0), to: at(60)},
+					{from: at(600), to: at(630)},
+				},
+			},
+		},
+		{
+			name: "recovery at the same second as another backend goes down is not an overlap",
+			intervals: monitorapi.Intervals{
+				haproxyDownInterval("master-0", "masters/master-0", at(0), at(20)),
+				haproxyDownInterval("master-0", "masters/master-1", at(0), at(20)),
+				haproxyDownInterval("master-0", "masters/master-2", at(20), at(40)),
+			},
+			expected: map[string][]apiOutageWindow{},
+		},
+		{
+			name: "one backend recovering and going down within the outage keeps a single window",
+			intervals: monitorapi.Intervals{
+				haproxyDownInterval("master-0", "masters/master-0", at(0), at(100)),
+				haproxyDownInterval("master-0", "masters/master-1", at(0), at(100)),
+				haproxyDownInterval("master-0", "masters/master-2", at(0), at(50)),
+				haproxyDownInterval("master-0", "masters/master-2", at(50), at(100)),
+			},
+			expected: map[string][]apiOutageWindow{
+				"master-0": {{from: at(0), to: at(100)}},
+			},
+		},
+		{
+			name: "outages tracked separately per haproxy instance",
+			intervals: monitorapi.Intervals{
+				haproxyDownInterval("master-0", "masters/master-0", at(0), at(60)),
+				haproxyDownInterval("master-0", "masters/master-1", at(0), at(60)),
+				haproxyDownInterval("master-0", "masters/master-2", at(0), at(60)),
+				haproxyDownInterval("master-1", "masters/master-0", at(300), at(360)),
+				haproxyDownInterval("master-1", "masters/master-1", at(300), at(360)),
+				haproxyDownInterval("master-1", "masters/master-2", at(300), at(360)),
+			},
+			expected: map[string][]apiOutageWindow{
+				"master-0": {{from: at(0), to: at(60)}},
+				"master-1": {{from: at(300), to: at(360)}},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			actual := findFullAPIOutageWindows(tt.intervals, fullOutageBackendThreshold)
+			assert.Equal(t, tt.expected, actual)
+		})
+	}
+}
+
+func TestEvaluateFullAPIOutages(t *testing.T) {
+	// installOutage simulates the expected initial state: when haproxy starts during the
+	// installation, all kube-apiservers are down until they come up for the first time.
+	installOutage := func(reportingNode string) monitorapi.Intervals {
+		return monitorapi.Intervals{
+			haproxyDownInterval(reportingNode, "masters/master-0", at(0), at(300)),
+			haproxyDownInterval(reportingNode, "masters/master-1", at(0), at(360)),
+			haproxyDownInterval(reportingNode, "masters/master-2", at(0), at(420)),
+		}
+	}
+
+	tests := []struct {
+		name            string
+		intervals       monitorapi.Intervals
+		expectFailure   bool
+		expectedOutputs []string
+	}{
+		{
+			name:          "no intervals",
+			intervals:     monitorapi.Intervals{},
+			expectFailure: false,
+		},
+		{
+			name:          "only the installation outage",
+			intervals:     installOutage("master-0"),
+			expectFailure: false,
+		},
+		{
+			name: "full outage after the installation",
+			intervals: append(installOutage("master-0"),
+				haproxyDownInterval("master-0", "masters/master-0", at(3600), at(3630)),
+				haproxyDownInterval("master-0", "masters/master-1", at(3600), at(3630)),
+				haproxyDownInterval("master-0", "masters/master-2", at(3600), at(3630)),
+			),
+			expectFailure: true,
+			expectedOutputs: []string{
+				"haproxy on node master-0",
+				at(3600).Format(time.RFC3339),
+				at(3630).Format(time.RFC3339),
+			},
+		},
+		{
+			name:          "initial outage tolerated separately per haproxy instance",
+			intervals:     append(installOutage("master-0"), installOutage("master-1")...),
+			expectFailure: false,
+		},
+		{
+			name: "partial outage after the installation does not fail",
+			intervals: append(installOutage("master-0"),
+				haproxyDownInterval("master-0", "masters/master-0", at(3600), at(3630)),
+				haproxyDownInterval("master-0", "masters/master-1", at(3600), at(3630)),
+			),
+			expectFailure: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			junits := evaluateFullAPIOutages(tt.intervals)
+			require.Len(t, junits, 1)
+
+			if !tt.expectFailure {
+				assert.Nil(t, junits[0].FailureOutput, "expected the test to pass")
+				return
+			}
+
+			require.NotNil(t, junits[0].FailureOutput, "expected the test to fail")
+			for _, expectedOutput := range tt.expectedOutputs {
+				assert.Contains(t, junits[0].SystemOut, expectedOutput)
+			}
+		})
+	}
+}