diff --git a/changelog/unreleased/SOLR-18159-physical-memory-metrics.yml b/changelog/unreleased/SOLR-18159-physical-memory-metrics.yml new file mode 100644 index 000000000000..c24a17cd3e3b --- /dev/null +++ b/changelog/unreleased/SOLR-18159-physical-memory-metrics.yml @@ -0,0 +1,9 @@ +title: Add new metric jvm_system_memory_bytes +type: added +authors: + - name: Jan Høydahl + url: https://home.apache.org/phonebook.html?uid=janhoy + - name: Matthew Biscocho +links: + - name: SOLR-18159 + url: https://issues.apache.org/jira/browse/SOLR-18159 diff --git a/solr/core/src/java/org/apache/solr/metrics/OtelRuntimeJvmMetrics.java b/solr/core/src/java/org/apache/solr/metrics/OtelRuntimeJvmMetrics.java index 19803372dae9..a9c14bab2267 100644 --- a/solr/core/src/java/org/apache/solr/metrics/OtelRuntimeJvmMetrics.java +++ b/solr/core/src/java/org/apache/solr/metrics/OtelRuntimeJvmMetrics.java @@ -16,13 +16,20 @@ */ package org.apache.solr.metrics; +import static org.apache.solr.metrics.SolrMetricProducer.STATE_KEY_ATTR; + import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.common.Attributes; import io.opentelemetry.api.metrics.MeterProvider; +import io.opentelemetry.api.metrics.ObservableLongGauge; import io.opentelemetry.api.trace.TracerProvider; import io.opentelemetry.context.propagation.ContextPropagators; import io.opentelemetry.instrumentation.runtimemetrics.java17.RuntimeMetrics; import java.lang.invoke.MethodHandles; +import java.lang.management.ManagementFactory; +import org.apache.lucene.util.SuppressForbidden; import org.apache.solr.common.util.EnvUtils; +import org.apache.solr.metrics.otel.OtelUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,6 +38,7 @@ public class OtelRuntimeJvmMetrics { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private RuntimeMetrics runtimeMetrics; + private ObservableLongGauge systemMemoryGauge; private boolean isInitialized = false; // Main feature flag to enable/disable all JVM metrics @@ -38,6 +46,10 @@ public static boolean isJvmMetricsEnabled() { return EnvUtils.getPropertyAsBool("solr.metrics.jvm.enabled", true); } + @SuppressForbidden( + reason = + "com.sun.management.OperatingSystemMXBean is used intentionally for physical memory" + + " gauges; guarded by instanceof check so gracefully absent on non-HotSpot JVMs") public OtelRuntimeJvmMetrics initialize( SolrMetricManager solrMetricManager, String registryName) { if (!isJvmMetricsEnabled()) return this; @@ -65,6 +77,30 @@ public ContextPropagators getPropagators() { // TODO: We should have this configurable to enable/disable specific JVM metrics .enableAllFeatures() .build(); + java.lang.management.OperatingSystemMXBean osMxBean = + ManagementFactory.getOperatingSystemMXBean(); + if (osMxBean instanceof com.sun.management.OperatingSystemMXBean extOsMxBean) { + systemMemoryGauge = + solrMetricManager.observableLongGauge( + registryName, + "jvm.system.memory", + "Physical memory of the host or container in bytes (state=total|free)." + + " On Linux with cgroup limits, total reflects the container memory limit.", + measurement -> { + long total = extOsMxBean.getTotalMemorySize(); + long free = extOsMxBean.getFreeMemorySize(); + if (total >= 0) measurement.record(total, Attributes.of(STATE_KEY_ATTR, "total")); + if (free >= 0) measurement.record(free, Attributes.of(STATE_KEY_ATTR, "free")); + }, + OtelUnit.BYTES); + log.info("Physical memory metrics enabled"); + } else { + if (log.isDebugEnabled()) { + log.debug( + "Physical memory metrics unavailable:" + + " com.sun.management.OperatingSystemMXBean not present on this JVM"); + } + } isInitialized = true; log.info("JVM metrics collection successfully initialized"); return this; @@ -74,6 +110,10 @@ public void close() { if (runtimeMetrics != null && isInitialized) { try { runtimeMetrics.close(); + if (systemMemoryGauge != null) { + systemMemoryGauge.close(); + systemMemoryGauge = null; + } } catch (Exception e) { log.error("Failed to close JVM metrics collection", e); } finally { diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricProducer.java b/solr/core/src/java/org/apache/solr/metrics/SolrMetricProducer.java index 9631c5bedfd5..d98545b36df7 100644 --- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricProducer.java +++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricProducer.java @@ -30,6 +30,7 @@ public interface SolrMetricProducer extends AutoCloseable { public static final AttributeKey RESULT_ATTR = AttributeKey.stringKey("result"); public static final AttributeKey NAME_ATTR = AttributeKey.stringKey("name"); public static final AttributeKey PLUGIN_NAME_ATTR = AttributeKey.stringKey("plugin_name"); + public static final AttributeKey STATE_KEY_ATTR = AttributeKey.stringKey("state"); /** * Unique metric tag identifies components with the same life-cycle, which should be registered / diff --git a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java index b3b9f42cd5d4..ca2a8f4b0078 100644 --- a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java +++ b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java @@ -16,18 +16,22 @@ */ package org.apache.solr.metrics; +import com.sun.management.OperatingSystemMXBean; import io.opentelemetry.exporter.prometheus.PrometheusMetricReader; import io.prometheus.metrics.model.snapshots.MetricSnapshots; +import java.lang.management.ManagementFactory; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.Set; import java.util.stream.Collectors; +import org.apache.lucene.util.SuppressForbidden; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.core.NodeConfig; import org.apache.solr.core.SolrXmlConfig; import org.apache.solr.util.SolrJettyTestRule; +import org.junit.Assume; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Test; @@ -105,4 +109,52 @@ public void testSetupJvmMetrics() throws InterruptedException { "Should have JVM buffer metrics", metricNames.stream().anyMatch(name -> name.startsWith("jvm_buffer"))); } + + @Test + @SuppressForbidden(reason = "Testing com.sun.management.OperatingSystemMXBean availability") + public void testSystemMemoryMetrics() { + PrometheusMetricReader reader = + solrTestRule + .getJetty() + .getCoreContainer() + .getMetricManager() + .getPrometheusMetricReader("solr.jvm"); + MetricSnapshots snapshots = reader.collect(); + + Set metricNames = + snapshots.stream() + .map(metric -> metric.getMetadata().getPrometheusName()) + .collect(Collectors.toSet()); + + // Physical memory metrics are only present when com.sun.management.OperatingSystemMXBean + // is available. If absent, the test is skipped. + boolean isHotSpot = + ManagementFactory.getOperatingSystemMXBean() instanceof OperatingSystemMXBean; + Assume.assumeTrue( + "Skipping: com.sun.management.OperatingSystemMXBean not available", isHotSpot); + + assertTrue( + "Should have jvm_system_memory_bytes metric (with state=total and state=free)", + metricNames.contains("jvm_system_memory_bytes")); + } + + @Test + public void testJvmMetricsDisabledNoSystemMemory() throws Exception { + // Verify that when JVM metrics are disabled, initialization is a no-op and close() is safe + SolrMetricManager metricManager = solrTestRule.getJetty().getCoreContainer().getMetricManager(); + String prevValue = System.getProperty("solr.metrics.jvm.enabled"); + System.setProperty("solr.metrics.jvm.enabled", "false"); + try { + OtelRuntimeJvmMetrics disabledMetrics = new OtelRuntimeJvmMetrics(); + OtelRuntimeJvmMetrics result = disabledMetrics.initialize(metricManager, "solr.jvm"); + assertFalse("Should not be initialized when JVM metrics disabled", result.isInitialized()); + disabledMetrics.close(); // must not throw + } finally { + if (prevValue == null) { + System.clearProperty("solr.metrics.jvm.enabled"); + } else { + System.setProperty("solr.metrics.jvm.enabled", prevValue); + } + } + } } diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc index 7d0e79a2c0a5..58bf50ebf022 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc @@ -98,6 +98,21 @@ The `JVM Registry` gathers metrics from the JVM using the OpenTelemetry instrume JVM metrics are enabled by default but can be disabled by setting either the system property `-Dsolr.metrics.jvm.enabled=false` or the environment variable `SOLR_METRICS_JVM_ENABLED=false`. +==== Physical Memory Metrics + +Solr exposes a gauge for host or container physical memory, registered under the `solr.jvm` registry: + +[cols="2,1,3",options="header"] +|=== +| Prometheus Metric Name | Type | Description +| `jvm_system_memory_bytes{state="total"}` | gauge | Total physical memory of the host or container in bytes. On Linux with cgroup memory limits, reflects the container limit rather than host RAM. +| `jvm_system_memory_bytes{state="free"}` | gauge | Free (unused) physical memory of the host or container in bytes. +|=== + +NOTE: These metrics are available when the JVM provides the `com.sun.management.OperatingSystemMXBean` interface (this includes most HotSpot-derived JVMs). On JVMs that do not provide `com.sun.management.OperatingSystemMXBean`, the metrics are silently absent. + +NOTE: On Linux containers with cgroup v1 or v2 memory limits set, the JDK reports the container memory limit as the total, not the host's physical RAM. This is the correct value for calculating MMap cache efficiency in containerised deployments. + === Overseer Registry The `Overseer Registry` is initialized when running in SolrCloud mode and includes the following information: