Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ private static void run(final StatsDClientManager statsDClientManager, final Con
// Register JVM runtime metric callbacks against the OtelMeterProvider so the OTLP
// exporter started by CoreTracer collects them. Started here so it rides the same
// delayed-start path as JMXFetch itself.
JvmOtlpRuntimeMetrics.start();
JvmOtlpRuntimeMetrics.start(config.isMetricsOtelExperimentalEnabled());
// When the OTLP exporter is collecting JVM runtime metrics, skip the default JMXFetch
// JVM config to avoid double-reporting.
defaultConfigs.add(OTLP_JMX_CONFIG);
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package opentelemetry147.metrics;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import datadog.trace.agent.jmxfetch.JvmOtlpRuntimeMetrics;
import datadog.trace.bootstrap.otel.metrics.data.OtelMetricRegistry;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

// Forked test: runs in an isolated JVM and starts JvmOtlpRuntimeMetrics with the experimental
// flag OFF, verifying that Development-status instruments are not registered and that the
// jvm.gc.cause attribute is omitted from jvm.gc.duration data points. The JvmOtlpRuntimeMetrics
// class uses a one-shot AtomicBoolean to guard registration, so this scenario must run in its
// own JVM separate from the always-on JvmOtlpRuntimeMetricsTest.
class JvmOtlpRuntimeMetricsForkedTest {

@BeforeAll
static void setUp() {
System.setProperty("dd.metrics.otel.enabled", "true");
JvmOtlpRuntimeMetrics.start(false);
}

@Test
void registersOnlyStableMetricsWhenExperimentalDisabled() {
JvmOtlpRuntimeMetricsTest.MetricCollector collector =
new JvmOtlpRuntimeMetricsTest.MetricCollector();
OtelMetricRegistry.INSTANCE.collectMetrics(collector);

Set<String> names = collector.metricNames;

List<String> expectedStableMetrics =
Arrays.asList(
"jvm.memory.used",
"jvm.memory.committed",
"jvm.memory.limit",
"jvm.memory.used_after_last_gc",
"jvm.thread.count",
"jvm.class.loaded",
"jvm.class.count",
"jvm.class.unloaded",
"jvm.cpu.time",
"jvm.cpu.count",
"jvm.cpu.recent_utilization",
"jvm.gc.duration");
for (String metric : expectedStableMetrics) {
assertTrue(
names.contains(metric),
"Expected stable metric '" + metric + "' not found. Got: " + new TreeSet<>(names));
}

List<String> developmentMetrics =
Arrays.asList(
"jvm.memory.init",
"jvm.buffer.memory.used",
"jvm.buffer.memory.limit",
"jvm.buffer.count",
"jvm.system.cpu.utilization",
"jvm.system.cpu.load_1m",
"jvm.file_descriptor.count",
"jvm.file_descriptor.limit");
for (String metric : developmentMetrics) {
assertFalse(
names.contains(metric),
"Development metric '"
+ metric
+ "' should not be registered when experimental disabled. Got: "
+ new TreeSet<>(names));
}
}

@Test
void jvmGcDurationDataPointsOmitGcCauseWhenExperimentalDisabled() throws InterruptedException {
System.gc();

List<JvmOtlpRuntimeMetricsTest.DataPointEntry> points = null;
long deadlineNanos = System.nanoTime() + TimeUnit.SECONDS.toNanos(2);
while (System.nanoTime() < deadlineNanos) {
JvmOtlpRuntimeMetricsTest.MetricCollector collector =
new JvmOtlpRuntimeMetricsTest.MetricCollector();
OtelMetricRegistry.INSTANCE.collectMetrics(collector);
points = collector.points.get("jvm.gc.duration");
if (points != null && !points.isEmpty()) {
break;
}
Thread.sleep(50);
}

assertNotNull(points, "jvm.gc.duration should have data points after System.gc()");
assertFalse(points.isEmpty(), "jvm.gc.duration should have at least one data point");
assertTrue(
points.stream()
.allMatch(
p ->
p.attrs.containsKey("jvm.gc.name")
&& p.attrs.containsKey("jvm.gc.action")
&& !p.attrs.containsKey("jvm.gc.cause")),
"jvm.gc.duration data points must carry jvm.gc.name and jvm.gc.action, but not jvm.gc.cause"
+ " when experimental disabled");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.sun.management.UnixOperatingSystemMXBean;
import datadog.trace.agent.jmxfetch.JvmOtlpRuntimeMetrics;
import datadog.trace.bootstrap.otel.common.OtelInstrumentationScope;
import datadog.trace.bootstrap.otel.metrics.OtelInstrumentDescriptor;
Expand All @@ -15,12 +16,15 @@
import datadog.trace.bootstrap.otlp.metrics.OtlpMetricVisitor;
import datadog.trace.bootstrap.otlp.metrics.OtlpMetricsVisitor;
import datadog.trace.bootstrap.otlp.metrics.OtlpScopedMetricsVisitor;
import java.lang.management.ManagementFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
Expand All @@ -43,7 +47,7 @@ public class JvmOtlpRuntimeMetricsTest {
@BeforeAll
static void setUp() {
System.setProperty("dd.metrics.otel.enabled", "true");
JvmOtlpRuntimeMetrics.start();
JvmOtlpRuntimeMetrics.start(true);
}

@Test
Expand All @@ -67,7 +71,10 @@ void registersExpectedJvmMetrics() {
"jvm.class.unloaded",
"jvm.cpu.time",
"jvm.cpu.count",
"jvm.cpu.recent_utilization");
"jvm.cpu.recent_utilization",
"jvm.system.cpu.utilization",
"jvm.system.cpu.load_1m",
"jvm.gc.duration");

Set<String> names = collector.metricNames;
for (String metric : expectedMetrics) {
Expand All @@ -76,7 +83,18 @@ void registersExpectedJvmMetrics() {
"Expected metric '" + metric + "' not found. Got: " + new TreeSet<>(names));
}

assertEquals(15, names.size(), "Expected 15 metrics, got: " + new TreeSet<>(names));
int expectedSize = expectedMetrics.size();
if (ManagementFactory.getOperatingSystemMXBean() instanceof UnixOperatingSystemMXBean) {
assertTrue(
names.contains("jvm.file_descriptor.count"),
"Expected jvm.file_descriptor.count on Unix. Got: " + new TreeSet<>(names));
assertTrue(
names.contains("jvm.file_descriptor.limit"),
"Expected jvm.file_descriptor.limit on Unix. Got: " + new TreeSet<>(names));
expectedSize += 2;
}

assertEquals(expectedSize, names.size(), "Unexpected metric count: " + new TreeSet<>(names));

// No DD-proprietary names should be present
List<String> ddNames =
Expand Down Expand Up @@ -118,33 +136,112 @@ void jvmMemoryUsedHeapValueIsPositive() {
}

@Test
void jvmThreadCountIsPositive() {
void jvmThreadCountIsBucketedByDaemonAndState() {
MetricCollector collector = new MetricCollector();
OtelMetricRegistry.INSTANCE.collectMetrics(collector);

List<DataPointEntry> threadPoints = collector.points.get("jvm.thread.count");
assertNotNull(threadPoints, "jvm.thread.count should have data points");
assertFalse(threadPoints.isEmpty(), "jvm.thread.count should have data points");

// Every data point must carry both jvm.thread.daemon (Boolean) and jvm.thread.state (String).
Set<String> validStates = new HashSet<>();
for (Thread.State state : Thread.State.values()) {
validStates.add(state.name().toLowerCase(Locale.ROOT));
}
long totalThreads = 0;
for (DataPointEntry point : threadPoints) {
Object daemon = point.attrs.get("jvm.thread.daemon");
Object state = point.attrs.get("jvm.thread.state");
assertNotNull(daemon, "jvm.thread.count point missing jvm.thread.daemon: " + point.attrs);
assertNotNull(state, "jvm.thread.count point missing jvm.thread.state: " + point.attrs);
assertTrue(
"true".equals(daemon.toString()) || "false".equals(daemon.toString()),
"jvm.thread.daemon must be a boolean string, got " + daemon);
assertTrue(
validStates.contains(state.toString()),
"jvm.thread.state must be one of " + validStates + ", got " + state);
assertTrue(
point.value.longValue() > 0,
"jvm.thread.count bucket should be positive (empty buckets must be skipped), got "
+ point.value
+ " for "
+ point.attrs);
totalThreads += point.value.longValue();
}
assertTrue(totalThreads > 0, "Sum of jvm.thread.count buckets should be positive");

// The test JVM has at minimum: the main test thread (non-daemon) plus GC/JMX/etc. daemon
// threads — so we should observe at least one daemon=true and one daemon=false bucket.
Set<String> daemonValues = collector.attributeValues("jvm.thread.count", "jvm.thread.daemon");
assertTrue(
daemonValues.contains("true") && daemonValues.contains("false"),
"jvm.thread.count should emit both daemon and non-daemon buckets, got: " + daemonValues);
}

@Test
void jvmMemoryInitHasHeapNonHeapAndPoolAttributes() {
MetricCollector collector = new MetricCollector();
OtelMetricRegistry.INSTANCE.collectMetrics(collector);

Set<String> types = collector.attributeValues("jvm.memory.init", "jvm.memory.type");
assertTrue(types.contains("heap"), "jvm.memory.init should have heap aggregate");
assertTrue(types.contains("non_heap"), "jvm.memory.init should have non_heap aggregate");

Set<String> poolNames = collector.attributeValues("jvm.memory.init", "jvm.memory.pool.name");
assertFalse(
poolNames.isEmpty(),
"jvm.memory.init should have per-pool data points carrying jvm.memory.pool.name");
}

@Test
void jvmMemoryInitHeapAggregateIsPositive() {
MetricCollector collector = new MetricCollector();
OtelMetricRegistry.INSTANCE.collectMetrics(collector);

List<DataPointEntry> points = collector.points.get("jvm.memory.init");
assertNotNull(points, "jvm.memory.init should have data points");
DataPointEntry heapAggregate =
points.stream()
.filter(
p ->
"heap".equals(p.attrs.get("jvm.memory.type"))
&& p.attrs.get("jvm.memory.pool.name") == null)
.findFirst()
.orElse(null);
assertNotNull(heapAggregate, "jvm.memory.init should have a heap aggregate data point");
assertTrue(
threadPoints.get(0).value.longValue() > 0,
"jvm.thread.count value should be positive, got " + threadPoints.get(0).value);
heapAggregate.value.longValue() > 0,
"jvm.memory.init heap aggregate should be positive, got " + heapAggregate.value);
}

@Test
void startIsIdempotent() {
MetricCollector before = new MetricCollector();
OtelMetricRegistry.INSTANCE.collectMetrics(before);
int countBefore = before.metricNames.size();

JvmOtlpRuntimeMetrics.start();
JvmOtlpRuntimeMetrics.start();

MetricCollector after = new MetricCollector();
OtelMetricRegistry.INSTANCE.collectMetrics(after);
assertEquals(
countBefore,
after.metricNames.size(),
"Repeated start() must not register duplicate instruments");
void jvmGcDurationRecordsDataPointsAfterGc() throws InterruptedException {
// Force a GC; the JMX NotificationListener should observe the event and record a data
// point onto the jvm.gc.duration histogram.
System.gc();

// JMX delivers the notification on the JVM's internal notification thread, so we have
// to poll briefly. Two seconds is generous — delivery is typically sub-50ms.
List<DataPointEntry> points = null;
long deadlineNanos = System.nanoTime() + java.util.concurrent.TimeUnit.SECONDS.toNanos(2);
while (System.nanoTime() < deadlineNanos) {
MetricCollector collector = new MetricCollector();
OtelMetricRegistry.INSTANCE.collectMetrics(collector);
points = collector.points.get("jvm.gc.duration");
if (points != null && !points.isEmpty()) {
break;
}
Thread.sleep(50);
}

assertNotNull(points, "jvm.gc.duration should have data points after System.gc()");
assertFalse(points.isEmpty(), "jvm.gc.duration should have at least one data point");
assertTrue(
points.stream()
.allMatch(
p -> p.attrs.containsKey("jvm.gc.name") && p.attrs.containsKey("jvm.gc.action")),
"Every jvm.gc.duration data point should carry jvm.gc.name and jvm.gc.action attributes");
}

static final class DataPointEntry {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ public final class ConfigDefaults {
static final int DEFAULT_METRICS_OTEL_TIMEOUT = 7_500; // ms
static final int DEFAULT_METRICS_OTEL_CARDINALITY_LIMIT = 2_000;

public static final boolean DEFAULT_METRICS_OTEL_EXPERIMENTAL_ENABLED = true;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was an intentional decision. Idea being that JMX inherently emits many metrics that are considered as "developmental" in OTel, so it would be an easy win to enable these by default.


public static final int DEFAULT_OTLP_TRACES_TIMEOUT = 10_000; // ms

static final String DEFAULT_OTLP_HTTP_LOGS_ENDPOINT = "v1/logs";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ public final class OtlpConfig {
public static final String METRICS_OTEL_INTERVAL = "metrics.otel.interval";
public static final String METRICS_OTEL_TIMEOUT = "metrics.otel.timeout";
public static final String METRICS_OTEL_CARDINALITY_LIMIT = "metrics.otel.cardinality.limit";
public static final String METRICS_OTEL_EXPERIMENTAL_ENABLED =
"metrics.otel.experimental.enabled";

public static final String OTLP_METRICS_ENDPOINT = "otlp.metrics.endpoint";
public static final String OTLP_METRICS_HEADERS = "otlp.metrics.headers";
Expand Down
13 changes: 13 additions & 0 deletions internal-api/src/main/java/datadog/trace/api/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
import static datadog.trace.api.ConfigDefaults.DEFAULT_LOGS_OTEL_QUEUE_SIZE;
import static datadog.trace.api.ConfigDefaults.DEFAULT_LOGS_OTEL_TIMEOUT;
import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_CARDINALITY_LIMIT;
import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_EXPERIMENTAL_ENABLED;
import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_INTERVAL;
import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_TIMEOUT;
import static datadog.trace.api.ConfigDefaults.DEFAULT_OTLP_GRPC_PORT;
Expand Down Expand Up @@ -466,6 +467,7 @@
import static datadog.trace.api.config.OtlpConfig.LOGS_OTEL_QUEUE_SIZE;
import static datadog.trace.api.config.OtlpConfig.LOGS_OTEL_TIMEOUT;
import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_CARDINALITY_LIMIT;
import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_EXPERIMENTAL_ENABLED;
import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_EXPORTER;
import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_INTERVAL;
import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_TIMEOUT;
Expand Down Expand Up @@ -970,6 +972,7 @@ public static String getHostName() {
private final int metricsOtelInterval;
private final int metricsOtelTimeout;
private final int metricsOtelCardinalityLimit;
private final boolean metricsOtelExperimentalEnabled;
private final String otlpMetricsEndpoint;
private final Map<String, String> otlpMetricsHeaders;
private final OtlpConfig.Protocol otlpMetricsProtocol;
Expand Down Expand Up @@ -2054,6 +2057,10 @@ private Config(final ConfigProvider configProvider, final InstrumenterConfig ins
}
metricsOtelTimeout = otelTimeout;

metricsOtelExperimentalEnabled =
configProvider.getBoolean(
METRICS_OTEL_EXPERIMENTAL_ENABLED, DEFAULT_METRICS_OTEL_EXPERIMENTAL_ENABLED);

// keep OTLP default timeout below the overall export timeout
int defaultOtlpMetricsTimeout = Math.min(metricsOtelTimeout, DEFAULT_METRICS_OTEL_TIMEOUT);
otlpTimeout = configProvider.getInteger(OTLP_METRICS_TIMEOUT, defaultOtlpMetricsTimeout);
Expand Down Expand Up @@ -5479,6 +5486,10 @@ public boolean isMetricsOtlpExporterEnabled() {
return "otlp".equalsIgnoreCase(metricsOtelExporter);
}

public boolean isMetricsOtelExperimentalEnabled() {
return metricsOtelExperimentalEnabled;
}

public int getMetricsOtelCardinalityLimit() {
return metricsOtelCardinalityLimit;
}
Expand Down Expand Up @@ -6601,6 +6612,8 @@ public String toString() {
+ metricsOtelTimeout
+ ", metricsOtelCardinalityLimit="
+ metricsOtelCardinalityLimit
+ ", metricsOtelExperimentalEnabled="
+ metricsOtelExperimentalEnabled
+ ", otlpMetricsEndpoint="
+ otlpMetricsEndpoint
+ ", otlpMetricsHeaders="
Expand Down
Loading