Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
266 changes: 266 additions & 0 deletions collector/bcachefs_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
// Copyright 2025 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !nobcachefs

package collector

import (
"fmt"
"log/slog"
"os"
"regexp"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs/bcachefs"
)

func init() {
registerCollector("bcachefs", defaultEnabled, NewBcachefsCollector)
}

// bcachefsCollector collects metrics from bcachefs filesystems.
type bcachefsCollector struct {
fs bcachefs.FS
logger *slog.Logger
}

// NewBcachefsCollector returns a new Collector exposing bcachefs statistics.
func NewBcachefsCollector(logger *slog.Logger) (Collector, error) {
fs, err := bcachefs.NewFS(*sysPath)
if err != nil {
return nil, fmt.Errorf("failed to open sysfs: %w", err)
}

return &bcachefsCollector{
fs: fs,
logger: logger,
}, nil
}

// Update retrieves and exports bcachefs statistics.
func (c *bcachefsCollector) Update(ch chan<- prometheus.Metric) error {
const subsystem = "bcachefs"

stats, err := c.fs.Stats()
if err != nil {
if os.IsNotExist(err) {
c.logger.Debug("bcachefs sysfs path does not exist", "path", sysFilePath("fs/bcachefs"))
return ErrNoData
}
return fmt.Errorf("failed to retrieve bcachefs stats: %w", err)
}

if len(stats) == 0 {
return ErrNoData
}

for _, s := range stats {
uuid := s.UUID

ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "info"),
"Filesystem information.",
[]string{"uuid"},
nil,
),
prometheus.GaugeValue,
1,
uuid,
)

ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "btree_cache_size_bytes"),
"Btree cache memory usage in bytes.",
[]string{"uuid"},
nil,
),
prometheus.GaugeValue,
float64(s.BtreeCacheSizeBytes),
uuid,
)

for algorithm, comp := range s.Compression {
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "compression_compressed_bytes"),
"Compressed size by algorithm.",
[]string{"uuid", "algorithm"},
nil,
),
prometheus.GaugeValue,
float64(comp.CompressedBytes),
uuid, algorithm,
)
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "compression_uncompressed_bytes"),
"Uncompressed size by algorithm.",
[]string{"uuid", "algorithm"},
nil,
),
prometheus.GaugeValue,
float64(comp.UncompressedBytes),
uuid, algorithm,
)
}

for errorType, errStats := range s.Errors {
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "errors_total"),
"Error count by error type.",
[]string{"uuid", "error_type"},
nil,
),
prometheus.CounterValue,
float64(errStats.Count),
uuid, errorType,
)
}

for counterName, counterStats := range s.Counters {
metricName := sanitizeMetricName(counterName) + "_total"
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, metricName),
fmt.Sprintf("Bcachefs counter %s since filesystem creation.", counterName),
[]string{"uuid"},
nil,
),
prometheus.CounterValue,
float64(counterStats.SinceFilesystemCreation),
uuid,
)
}

for writeType, writeStats := range s.BtreeWrites {
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "btree_writes_total"),
"Number of btree writes by type.",
[]string{"uuid", "type"},
nil,
),
prometheus.CounterValue,
float64(writeStats.Count),
uuid, writeType,
)
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "btree_write_average_size_bytes"),
"Average btree write size by type.",
[]string{"uuid", "type"},
nil,
),
prometheus.GaugeValue,
float64(writeStats.SizeBytes),
uuid, writeType,
)
}

for device, devStats := range s.Devices {
if devStats == nil {
continue
}

ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_info"),
"Device information.",
[]string{"uuid", "device", "label", "state"},
nil,
),
prometheus.GaugeValue,
1,
uuid, device, devStats.Label, devStats.State,
)

ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_bucket_size_bytes"),
"Bucket size in bytes.",
[]string{"uuid", "device"},
nil,
),
prometheus.GaugeValue,
float64(devStats.BucketSizeBytes),
uuid, device,
)

ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_buckets"),
"Total number of buckets.",
[]string{"uuid", "device"},
nil,
),
prometheus.GaugeValue,
float64(devStats.Buckets),
uuid, device,
)

ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_durability"),
"Device durability setting.",
[]string{"uuid", "device"},
nil,
),
prometheus.GaugeValue,
float64(devStats.Durability),
uuid, device,
)

for op, dataTypes := range devStats.IODone {
for dataType, value := range dataTypes {
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_io_done_bytes_total"),
"IO bytes by operation type and data type.",
[]string{"uuid", "device", "operation", "data_type"},
nil,
),
prometheus.CounterValue,
float64(value),
uuid, device, op, dataType,
)
}
}

for errorType, value := range devStats.IOErrors {
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "device_io_errors_total"),
"IO errors by error type.",
[]string{"uuid", "device", "type"},
nil,
),
prometheus.CounterValue,
float64(value),
uuid, device, errorType,
)
}
}
}

return nil
}

// sanitizeMetricName converts a string to a valid Prometheus metric name component.
func sanitizeMetricName(name string) string {
re := regexp.MustCompile(`[^a-zA-Z0-9_]`)
return re.ReplaceAllString(name, "_")
}
64 changes: 64 additions & 0 deletions collector/fixtures/e2e-64k-page-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,69 @@ node_bcache_writeback_rate_proportional_term{backing_device="bdev0",uuid="deaddd
# HELP node_bcache_written_bytes_total Sum of all data that has been written to the cache.
# TYPE node_bcache_written_bytes_total counter
node_bcache_written_bytes_total{cache_device="cache0",uuid="deaddd54-c735-46d5-868e-f331c5fd7c74"} 0
# HELP node_bcachefs_btree_cache_size_bytes Btree cache memory usage in bytes.
# TYPE node_bcachefs_btree_cache_size_bytes gauge
node_bcachefs_btree_cache_size_bytes{uuid="12345678-1234-1234-1234-123456789abc"} 5.49453824e+08
# HELP node_bcachefs_btree_node_read_total Bcachefs counter btree_node_read since filesystem creation.
# TYPE node_bcachefs_btree_node_read_total counter
node_bcachefs_btree_node_read_total{uuid="12345678-1234-1234-1234-123456789abc"} 67890
# HELP node_bcachefs_btree_node_write_total Bcachefs counter btree_node_write since filesystem creation.
# TYPE node_bcachefs_btree_node_write_total counter
node_bcachefs_btree_node_write_total{uuid="12345678-1234-1234-1234-123456789abc"} 9876
# HELP node_bcachefs_btree_write_average_size_bytes Average btree write size by type.
# TYPE node_bcachefs_btree_write_average_size_bytes gauge
node_bcachefs_btree_write_average_size_bytes{type="cache_reclaim",uuid="12345678-1234-1234-1234-123456789abc"} 0
node_bcachefs_btree_write_average_size_bytes{type="init_next_bset",uuid="12345678-1234-1234-1234-123456789abc"} 24064
node_bcachefs_btree_write_average_size_bytes{type="initial",uuid="12345678-1234-1234-1234-123456789abc"} 110592
node_bcachefs_btree_write_average_size_bytes{type="interior",uuid="12345678-1234-1234-1234-123456789abc"} 354
node_bcachefs_btree_write_average_size_bytes{type="journal_reclaim",uuid="12345678-1234-1234-1234-123456789abc"} 405
# HELP node_bcachefs_btree_writes_total Number of btree writes by type.
# TYPE node_bcachefs_btree_writes_total counter
node_bcachefs_btree_writes_total{type="cache_reclaim",uuid="12345678-1234-1234-1234-123456789abc"} 0
node_bcachefs_btree_writes_total{type="init_next_bset",uuid="12345678-1234-1234-1234-123456789abc"} 6647
node_bcachefs_btree_writes_total{type="initial",uuid="12345678-1234-1234-1234-123456789abc"} 19088
node_bcachefs_btree_writes_total{type="interior",uuid="12345678-1234-1234-1234-123456789abc"} 16788
node_bcachefs_btree_writes_total{type="journal_reclaim",uuid="12345678-1234-1234-1234-123456789abc"} 541080
# HELP node_bcachefs_compression_compressed_bytes Compressed size by algorithm.
# TYPE node_bcachefs_compression_compressed_bytes gauge
node_bcachefs_compression_compressed_bytes{algorithm="incompressible",uuid="12345678-1234-1234-1234-123456789abc"} 5.905580032e+09
node_bcachefs_compression_compressed_bytes{algorithm="lz4",uuid="12345678-1234-1234-1234-123456789abc"} 2.07232172032e+10
# HELP node_bcachefs_compression_uncompressed_bytes Uncompressed size by algorithm.
# TYPE node_bcachefs_compression_uncompressed_bytes gauge
node_bcachefs_compression_uncompressed_bytes{algorithm="incompressible",uuid="12345678-1234-1234-1234-123456789abc"} 5.905580032e+09
node_bcachefs_compression_uncompressed_bytes{algorithm="lz4",uuid="12345678-1234-1234-1234-123456789abc"} 7.1940702208e+10
# HELP node_bcachefs_device_bucket_size_bytes Bucket size in bytes.
# TYPE node_bcachefs_device_bucket_size_bytes gauge
node_bcachefs_device_bucket_size_bytes{device="0",uuid="12345678-1234-1234-1234-123456789abc"} 524288
# HELP node_bcachefs_device_buckets Total number of buckets.
# TYPE node_bcachefs_device_buckets gauge
node_bcachefs_device_buckets{device="0",uuid="12345678-1234-1234-1234-123456789abc"} 524288
# HELP node_bcachefs_device_durability Device durability setting.
# TYPE node_bcachefs_device_durability gauge
node_bcachefs_device_durability{device="0",uuid="12345678-1234-1234-1234-123456789abc"} 1
# HELP node_bcachefs_device_info Device information.
# TYPE node_bcachefs_device_info gauge
node_bcachefs_device_info{device="0",label="ssd.ssd1",state="rw",uuid="12345678-1234-1234-1234-123456789abc"} 1
# HELP node_bcachefs_device_io_done_bytes_total IO bytes by operation type and data type.
# TYPE node_bcachefs_device_io_done_bytes_total counter
node_bcachefs_device_io_done_bytes_total{data_type="btree",device="0",operation="read",uuid="12345678-1234-1234-1234-123456789abc"} 4.411097088e+09
node_bcachefs_device_io_done_bytes_total{data_type="btree",device="0",operation="write",uuid="12345678-1234-1234-1234-123456789abc"} 1.171456e+06
node_bcachefs_device_io_done_bytes_total{data_type="sb",device="0",operation="read",uuid="12345678-1234-1234-1234-123456789abc"} 3.989504e+06
node_bcachefs_device_io_done_bytes_total{data_type="sb",device="0",operation="write",uuid="12345678-1234-1234-1234-123456789abc"} 3.1417344e+07
node_bcachefs_device_io_done_bytes_total{data_type="user",device="0",operation="read",uuid="12345678-1234-1234-1234-123456789abc"} 5.768222552064e+12
node_bcachefs_device_io_done_bytes_total{data_type="user",device="0",operation="write",uuid="12345678-1234-1234-1234-123456789abc"} 3.919681536e+10
# HELP node_bcachefs_device_io_errors_total IO errors by error type.
# TYPE node_bcachefs_device_io_errors_total counter
node_bcachefs_device_io_errors_total{device="0",type="checksum",uuid="12345678-1234-1234-1234-123456789abc"} 0
node_bcachefs_device_io_errors_total{device="0",type="read",uuid="12345678-1234-1234-1234-123456789abc"} 197346
node_bcachefs_device_io_errors_total{device="0",type="write",uuid="12345678-1234-1234-1234-123456789abc"} 0
# HELP node_bcachefs_errors_total Error count by error type.
# TYPE node_bcachefs_errors_total counter
node_bcachefs_errors_total{error_type="btree_node_read_err",uuid="12345678-1234-1234-1234-123456789abc"} 5
node_bcachefs_errors_total{error_type="checksum_err",uuid="12345678-1234-1234-1234-123456789abc"} 2
# HELP node_bcachefs_info Filesystem information.
# TYPE node_bcachefs_info gauge
node_bcachefs_info{uuid="12345678-1234-1234-1234-123456789abc"} 1
# HELP node_bonding_active Number of active slaves per bonding interface.
# TYPE node_bonding_active gauge
node_bonding_active{master="bond0"} 0
Expand Down Expand Up @@ -3034,6 +3097,7 @@ node_schedstat_waiting_seconds_total{cpu="1"} 364107.263788241
# TYPE node_scrape_collector_success gauge
node_scrape_collector_success{collector="arp"} 1
node_scrape_collector_success{collector="bcache"} 1
node_scrape_collector_success{collector="bcachefs"} 1
node_scrape_collector_success{collector="bonding"} 1
node_scrape_collector_success{collector="btrfs"} 1
node_scrape_collector_success{collector="buddyinfo"} 1
Expand Down
Loading