Skip to content
36 changes: 36 additions & 0 deletions plugins/header_rewrite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,43 @@ if(BUILD_TESTING)
target_link_libraries(test_header_rewrite PRIVATE header_rewrite_parser ts::inkevent ts::tscore)

if(maxminddb_FOUND)
target_compile_definitions(test_header_rewrite PRIVATE TS_USE_HRW_MAXMINDDB=1)
target_link_libraries(test_header_rewrite PRIVATE maxminddb::maxminddb)

find_package(
Python3
COMPONENTS Interpreter
QUIET
)
if(Python3_FOUND)
execute_process(
COMMAND "${Python3_EXECUTABLE}" -c "import mmdb_writer; import netaddr"
RESULT_VARIABLE _mmdb_python_result
OUTPUT_QUIET ERROR_QUIET
)
if(_mmdb_python_result EQUAL 0)
set(_mmdb_test_dir "${CMAKE_CURRENT_BINARY_DIR}/test_mmdb")
add_custom_command(
OUTPUT "${_mmdb_test_dir}/test_flat_geo.mmdb" "${_mmdb_test_dir}/test_nested_geo.mmdb"
COMMAND ${CMAKE_COMMAND} -E make_directory "${_mmdb_test_dir}"
COMMAND ${Python3_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/generate_test_mmdb.py" "${_mmdb_test_dir}"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/generate_test_mmdb.py"
COMMENT "Generating test MMDB files for header_rewrite"
)
add_custom_target(
generate_test_mmdb DEPENDS "${_mmdb_test_dir}/test_flat_geo.mmdb" "${_mmdb_test_dir}/test_nested_geo.mmdb"
)
add_dependencies(test_header_rewrite generate_test_mmdb)
set_tests_properties(
test_header_rewrite
PROPERTIES
ENVIRONMENT
"MMDB_TEST_FLAT=${_mmdb_test_dir}/test_flat_geo.mmdb;MMDB_TEST_NESTED=${_mmdb_test_dir}/test_nested_geo.mmdb"
)
else()
message(STATUS "Python modules 'mmdb-writer'/'netaddr' not found; skipping test MMDB generation")
endif()
endif()
endif()

# This test has linker issue when cripts is enabled, so its commented for now
Expand Down
102 changes: 58 additions & 44 deletions plugins/header_rewrite/conditions_geo_maxmind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,31 @@

MMDB_s *gMaxMindDB = nullptr;

enum class MmdbSchema { NESTED, FLAT };
static MmdbSchema gMmdbSchema = MmdbSchema::NESTED;

// Detect whether the MMDB uses nested (GeoLite2) or flat (vendor) field layout
// by probing for the nested country path on a lookup result.
static MmdbSchema
detect_schema(MMDB_entry_s *entry)
{
MMDB_entry_data_s probe;
int status = MMDB_get_value(entry, &probe, "country", "iso_code", NULL);

if (MMDB_SUCCESS == status && probe.has_data && probe.type == MMDB_DATA_TYPE_UTF8_STRING) {
return MmdbSchema::NESTED;
}

status = MMDB_get_value(entry, &probe, "country_code", NULL);
if (MMDB_SUCCESS == status && probe.has_data && probe.type == MMDB_DATA_TYPE_UTF8_STRING) {
return MmdbSchema::FLAT;
}

return MmdbSchema::NESTED;
}

static const char *probe_ips[] = {"8.8.8.8", "1.1.1.1", "128.0.0.1"};

void
MMConditionGeo::initLibrary(const std::string &path)
{
Expand All @@ -51,9 +76,23 @@ MMConditionGeo::initLibrary(const std::string &path)
if (MMDB_SUCCESS != status) {
Dbg(pi_dbg_ctl, "Cannot open %s - %s", path.c_str(), MMDB_strerror(status));
delete gMaxMindDB;
gMaxMindDB = nullptr;
return;
}
Dbg(pi_dbg_ctl, "Loaded %s", path.c_str());

// Probe the database schema at load time so we know which field paths to
// use for country lookups. Try a few well-known IPs until one hits.
for (auto *ip : probe_ips) {
int gai_error, mmdb_error;
MMDB_lookup_result_s result = MMDB_lookup_string(gMaxMindDB, ip, &gai_error, &mmdb_error);
if (gai_error == 0 && MMDB_SUCCESS == mmdb_error && result.found_entry) {
gMmdbSchema = detect_schema(&result.entry);
Dbg(pi_dbg_ctl, "Loaded %s (schema: %s)", path.c_str(), gMmdbSchema == MmdbSchema::FLAT ? "flat" : "nested");
return;
}
}

Dbg(pi_dbg_ctl, "Loaded %s (schema: defaulting to nested, no probe IPs matched)", path.c_str());
}

std::string
Expand All @@ -74,48 +113,37 @@ MMConditionGeo::get_geo_string(const sockaddr *addr) const
return ret;
}

MMDB_entry_data_list_s *entry_data_list = nullptr;
if (!result.found_entry) {
Dbg(pi_dbg_ctl, "No entry for this IP was found");
return ret;
}

int status = MMDB_get_entry_data_list(&result.entry, &entry_data_list);
if (MMDB_SUCCESS != status) {
Dbg(pi_dbg_ctl, "Error looking up entry data: %s", MMDB_strerror(status));
return ret;
}

if (entry_data_list == nullptr) {
Dbg(pi_dbg_ctl, "No data found");
return ret;
}
MMDB_entry_data_s entry_data;
int status;

const char *field_name;
switch (_geo_qual) {
case GEO_QUAL_COUNTRY:
field_name = "country_code";
if (gMmdbSchema == MmdbSchema::FLAT) {
status = MMDB_get_value(&result.entry, &entry_data, "country_code", NULL);
} else {
status = MMDB_get_value(&result.entry, &entry_data, "country", "iso_code", NULL);
}
break;
case GEO_QUAL_ASN_NAME:
field_name = "autonomous_system_organization";
status = MMDB_get_value(&result.entry, &entry_data, "autonomous_system_organization", NULL);
break;
default:
Dbg(pi_dbg_ctl, "Unsupported field %d", _geo_qual);
return ret;
break;
}

MMDB_entry_data_s entry_data;

status = MMDB_get_value(&result.entry, &entry_data, field_name, NULL);
if (MMDB_SUCCESS != status) {
Dbg(pi_dbg_ctl, "ERROR on get value asn value: %s", MMDB_strerror(status));
Dbg(pi_dbg_ctl, "Error looking up geo string field: %s", MMDB_strerror(status));
return ret;
}
ret = std::string(entry_data.utf8_string, entry_data.data_size);

if (nullptr != entry_data_list) {
MMDB_free_entry_data_list(entry_data_list);
if (entry_data.has_data && entry_data.type == MMDB_DATA_TYPE_UTF8_STRING) {
ret = std::string(entry_data.utf8_string, entry_data.data_size);
}

return ret;
Expand All @@ -139,45 +167,31 @@ MMConditionGeo::get_geo_int(const sockaddr *addr) const
return ret;
}

MMDB_entry_data_list_s *entry_data_list = nullptr;
if (!result.found_entry) {
Dbg(pi_dbg_ctl, "No entry for this IP was found");
return ret;
}

int status = MMDB_get_entry_data_list(&result.entry, &entry_data_list);
if (MMDB_SUCCESS != status) {
Dbg(pi_dbg_ctl, "Error looking up entry data: %s", MMDB_strerror(status));
return ret;
}

if (entry_data_list == nullptr) {
Dbg(pi_dbg_ctl, "No data found");
return ret;
}
MMDB_entry_data_s entry_data;
int status;

const char *field_name;
switch (_geo_qual) {
case GEO_QUAL_ASN:
field_name = "autonomous_system_number";
// GeoLite2-ASN / DBIP-ASN store this as a top-level uint32 field
status = MMDB_get_value(&result.entry, &entry_data, "autonomous_system_number", NULL);
break;
default:
Dbg(pi_dbg_ctl, "Unsupported field %d", _geo_qual);
return ret;
break;
}

MMDB_entry_data_s entry_data;

status = MMDB_get_value(&result.entry, &entry_data, field_name, NULL);
if (MMDB_SUCCESS != status) {
Dbg(pi_dbg_ctl, "ERROR on get value asn value: %s", MMDB_strerror(status));
Dbg(pi_dbg_ctl, "Error looking up geo int field: %s", MMDB_strerror(status));
return ret;
}
ret = entry_data.uint32;

if (nullptr != entry_data_list) {
MMDB_free_entry_data_list(entry_data_list);
if (entry_data.has_data && entry_data.type == MMDB_DATA_TYPE_UINT32) {
ret = entry_data.uint32;
}

return ret;
Expand Down
103 changes: 103 additions & 0 deletions plugins/header_rewrite/generate_test_mmdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Generate test MMDB files for header_rewrite geo lookup unit tests.

Two schemas exist in the wild:

Nested (GeoLite2/GeoIP2/DBIP): country -> iso_code
Flat (vendor-specific): country_code (top-level)

This script generates one MMDB file for each schema so the C++ test
can verify that auto-detection works for both.

Requires: pip install mmdb-writer netaddr
"""

import os
import sys

try:
from mmdb_writer import MMDBWriter, MmdbU32
import netaddr
except ImportError:
print("SKIP: mmdb-writer or netaddr not installed (pip install mmdb-writer netaddr)", file=sys.stderr)
sys.exit(1)


def net(cidr):
return netaddr.IPSet([netaddr.IPNetwork(cidr)])


def generate_flat(path):
"""Flat schema: country_code at top level (vendor databases)."""
w = MMDBWriter(ip_version=4, database_type="Test-Flat-GeoIP")
w.insert_network(
net("8.8.8.0/24"), {
"country_code": "US",
"autonomous_system_number": MmdbU32(15169),
"autonomous_system_organization": "GOOGLE",
})
w.insert_network(
net("1.2.3.0/24"), {
"country_code": "KR",
"autonomous_system_number": MmdbU32(9286),
"autonomous_system_organization": "KINX",
})
w.to_db_file(path)


def generate_nested(path):
"""Nested schema: country/iso_code (GeoLite2, GeoIP2, DBIP)."""
w = MMDBWriter(ip_version=4, database_type="Test-Nested-GeoIP2")
w.insert_network(
net("8.8.8.0/24"), {
"country": {
"iso_code": "US",
"names": {
"en": "United States"
}
},
"autonomous_system_number": MmdbU32(15169),
"autonomous_system_organization": "GOOGLE",
})
w.insert_network(
net("1.2.3.0/24"), {
"country": {
"iso_code": "KR",
"names": {
"en": "South Korea"
}
},
"autonomous_system_number": MmdbU32(9286),
"autonomous_system_organization": "KINX",
})
w.to_db_file(path)


if __name__ == "__main__":
outdir = sys.argv[1] if len(sys.argv) > 1 else "."

flat_path = os.path.join(outdir, "test_flat_geo.mmdb")
nested_path = os.path.join(outdir, "test_nested_geo.mmdb")

generate_flat(flat_path)
generate_nested(nested_path)

print(f"Generated {flat_path} ({os.path.getsize(flat_path)} bytes)")
print(f"Generated {nested_path} ({os.path.getsize(nested_path)} bytes)")
Loading