From 5340ed8e426c71384351fb2479e11cdbb7ab4d93 Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Sat, 23 May 2026 20:59:59 +0100 Subject: [PATCH 1/6] refactor: modernise CompileParseRules.cc for type safety and modern C++ - Replace C-style file I/O (fopen, fprintf, fclose) with std::ofstream for RAII-based file handling - Use fixed-width integer types (uint32_t, uint8_t) instead of unsigned int and char for portability and clarity - Refactor uint_to_binary function to use std::string instead of static buffer for thread safety - Add comprehensive Doxygen documentation for the file, functions, and arrays - Remove @section license License from file header to prevent Doxygen warnings about multiple use of section label, as @section is intended for major structured documentation sections, not repetitive boilerplate - Remove obsolete COMPILE_PARSE_RULES macro and ink_string.h dependency - Improve output formatting using std::setw, std::setfill, and std::hex for consistent alignment - Replace int loop variables with uint16_t for better type safety - Add static_cast for explicit type conversions --- src/tscore/CompileParseRules.cc | 369 +++++++++++++++++++++----------- 1 file changed, 249 insertions(+), 120 deletions(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index 6921072df83..52a7be7ec78 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -1,179 +1,308 @@ -/** @file - - A brief file description - - @section license License +/** + * @file CompileParseRules.cc + * + * @brief Build-time utility for generating ParseRules character classification tables. + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This standalone C++ program generates static lookup tables used by the + * Traffic Server runtime for O(1) character classification and case conversion. + * It is executed during the build process and is **not part of the runtime library**. + * + * The generated tables are written to the following files: + * - @c ParseRulesCType: Bitmask of character type flags (32-bit values). + * - @c ParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). + * - @c ParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). + * + * These files are typically included as static data in @c ParseRules.cc. + * + * @note This tool uses modern C++ features, including: + * - @c std::string for type-safe string handling. + * - @c std::ofstream for RAII-based file I/O. + * - Fixed-width integer types (@c uint8_t, @c uint32_t) for portability. + * + * @see ParseRules.h for character classification function declarations. + * @see ParseRules.cc for runtime usage of generated tables. + */ - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at +#include +#include +#include +#include +#include +#include - http://www.apache.org/licenses/LICENSE-2.0 +#include "tscore/ParseRules.h" - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +/** + * @brief Placeholder for the character type bitmask table. + * + * This array is initialized to zero and serves as a template for the generated + * @c ParseRulesCType table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c ParseRulesCType file. */ +const uint32_t parseRulesCType[256] = {0}; -#define COMPILE_PARSE_RULES +/** + * @brief Placeholder for the uppercase conversion table. + * + * This array is initialized to zero and serves as a template for the generated + * @c ParseRulesCTypeToUpper table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c ParseRulesCTypeToUpper file. + */ +const uint8_t parseRulesCTypeToUpper[256] = {0}; -#include "tscore/ParseRules.h" +/** + * @brief Placeholder for the lowercase conversion table. + * + * This array is initialized to zero and serves as a template for the generated + * @c ParseRulesCTypeToLower table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c ParseRulesCTypeToLower file. + */ +const uint8_t parseRulesCTypeToLower[256] = {0}; -const unsigned int parseRulesCType[256] = {0}; -const char parseRulesCTypeToUpper[256] = {0}; -const char parseRulesCTypeToLower[256] = {0}; +/** + * @brief Working array for character type bitmasks. + * + * Temporary storage for computed character classification bitmasks. + * Populated by @c main() and written to the @c ParseRulesCType output file. + * + * @note Each entry is a 32-bit bitmask where each bit represents a character + * classification (e.g., @c is_alpha_BIT, @c is_digit_BIT). + */ +uint32_t tparseRulesCType[256]; -unsigned int tparseRulesCType[256]; -char tparseRulesCTypeToUpper[256]; -char tparseRulesCTypeToLower[256]; +/** + * @brief Working array for uppercase character conversion. + * + * Temporary storage for uppercase character mappings (0-255). + * Populated by @c main() and written to the @c ParseRulesCTypeToUpper output file. + */ +uint8_t tparseRulesCTypeToUpper[256]; -#include -#include -#include "tscore/ink_string.h" +/** + * @brief Working array for lowercase character conversion. + * + * Temporary storage for lowercase character mappings (0-255). + * Populated by @c main() and written to the @c ParseRulesCTypeToLower output file. + */ +uint8_t tparseRulesCTypeToLower[256]; -static char * -uint_to_binary(unsigned int u) +/** + * @brief Convert a 32-bit unsigned integer to its binary string representation. + * + * Creates a 32-character string representing the binary value of the input, + * with leading zeros. The string is in **big-endian format** (MSB first). + * + * @param u The 32-bit unsigned integer to convert. + * @return @c std::string containing the 32-character binary representation. + * Example: @c uint_to_binary(5) returns @c "00000000000000000000000000000101". + * + * @note This function is **thread-safe** because it returns a new @c std::string + * by value (no shared static buffer). + */ +std::string +uint_to_binary(uint32_t u) { - int i; - static char buf[33]; - for (i = 0; i < 32; i++) { - buf[i] = ((u & (1 << (31 - i))) ? '1' : '0'); + std::string buf(32, '0'); + for (uint8_t i = 0; i < 32; i++) { + if (u & (1 << (31 - i))) { + buf[i] = '1'; + } } - buf[32] = '\0'; - return (buf); + return buf; } +/** + * @brief Generates character classification lookup tables. + * + * This function performs the following steps: + * + * 1. For each ASCII character (0-255): + * - Initializes the working arrays (@c tparseRulesCType, @c tparseRulesCTypeToUpper, + * @c tparseRulesCTypeToLower). + * - Tests the character against all @c ParseRules classification functions. + * - Sets the corresponding bit in @c tparseRulesCType[i] for each matching classification. + * - Stores the uppercase/lowercase conversion values in @c tparseRulesCTypeToUpper + * and @c tparseRulesCTypeToLower. + * + * 2. Writes three output files using @c std::ofstream (RAII): + * - @c ParseRulesCType: Contains hexadecimal bitmask values and their binary + * representations. Format: C-style array initialization. + * - @c ParseRulesCTypeToUpper: Contains uppercase conversion values for each + * character. Format: @c (uint8_t)X, + * - @c ParseRulesCTypeToLower: Contains lowercase conversion values for each + * character. Format: @c (uint8_t)X, + * + * @return 0 on successful completion. + * + * @note The classification functions from @c ParseRules.h include: + * - **Character types**: @c is_char, @c is_alpha, @c is_digit, @c is_alnum, + * @c is_ctl, @c is_ws, @c is_hex, @c is_pchar, @c is_token, @c is_uri, + * @c is_sep, @c is_empty. + * - **Case types**: @c is_upalpha, @c is_loalpha. + * - **Safety/encoding**: @c is_safe, @c is_unsafe, @c is_reserved, + * @c is_unreserved, @c is_national. + * - **Special categories**: @c is_punct, @c is_tspecials, @c is_end_of_url. + * - **Whitespace variants**: @c is_spcr, @c is_splf, @c is_wslfcr, @c is_eow. + * - **HTTP/MIME**: @c is_http_field_name, @c is_http_field_value, + * @c is_mime_sep, @c is_control. + * + * @see ParseRules.h for detailed descriptions of each classification function. + * @see ParseRules.cc for runtime usage of the generated tables. + */ int main() { - int c; - for (c = 0; c < 256; c++) { - tparseRulesCType[c] = 0; - tparseRulesCTypeToLower[c] = ParseRules::ink_tolower(c); - tparseRulesCTypeToUpper[c] = ParseRules::ink_toupper(c); + for (uint16_t i = 0; i < 256; i++) { + tparseRulesCType[i] = 0; + tparseRulesCTypeToLower[i] = static_cast(ParseRules::ink_tolower(i)); + tparseRulesCTypeToUpper[i] = static_cast(ParseRules::ink_toupper(i)); - if (ParseRules::is_char(c)) { - tparseRulesCType[c] |= is_char_BIT; + if (ParseRules::is_char(i)) { + tparseRulesCType[i] |= is_char_BIT; } - if (ParseRules::is_upalpha(c)) { - tparseRulesCType[c] |= is_upalpha_BIT; + if (ParseRules::is_upalpha(i)) { + tparseRulesCType[i] |= is_upalpha_BIT; } - if (ParseRules::is_loalpha(c)) { - tparseRulesCType[c] |= is_loalpha_BIT; + if (ParseRules::is_loalpha(i)) { + tparseRulesCType[i] |= is_loalpha_BIT; } - if (ParseRules::is_alpha(c)) { - tparseRulesCType[c] |= is_alpha_BIT; + if (ParseRules::is_alpha(i)) { + tparseRulesCType[i] |= is_alpha_BIT; } - if (ParseRules::is_digit(c)) { - tparseRulesCType[c] |= is_digit_BIT; + if (ParseRules::is_digit(i)) { + tparseRulesCType[i] |= is_digit_BIT; } - if (ParseRules::is_ctl(c)) { - tparseRulesCType[c] |= is_ctl_BIT; + if (ParseRules::is_ctl(i)) { + tparseRulesCType[i] |= is_ctl_BIT; } - if (ParseRules::is_ws(c)) { - tparseRulesCType[c] |= is_ws_BIT; + if (ParseRules::is_ws(i)) { + tparseRulesCType[i] |= is_ws_BIT; } - if (ParseRules::is_hex(c)) { - tparseRulesCType[c] |= is_hex_BIT; + if (ParseRules::is_hex(i)) { + tparseRulesCType[i] |= is_hex_BIT; } - char cc = c; + + char cc = static_cast(i); + if (ParseRules::is_pchar(&cc)) { - tparseRulesCType[c] |= is_pchar_BIT; + tparseRulesCType[i] |= is_pchar_BIT; } - if (ParseRules::is_extra(c)) { - tparseRulesCType[c] |= is_extra_BIT; + if (ParseRules::is_extra(i)) { + tparseRulesCType[i] |= is_extra_BIT; } - if (ParseRules::is_safe(c)) { - tparseRulesCType[c] |= is_safe_BIT; + if (ParseRules::is_safe(i)) { + tparseRulesCType[i] |= is_safe_BIT; } - if (ParseRules::is_unsafe(c)) { - tparseRulesCType[c] |= is_unsafe_BIT; + if (ParseRules::is_unsafe(i)) { + tparseRulesCType[i] |= is_unsafe_BIT; } - if (ParseRules::is_national(c)) { - tparseRulesCType[c] |= is_national_BIT; + if (ParseRules::is_national(i)) { + tparseRulesCType[i] |= is_national_BIT; } - if (ParseRules::is_reserved(c)) { - tparseRulesCType[c] |= is_reserved_BIT; + if (ParseRules::is_reserved(i)) { + tparseRulesCType[i] |= is_reserved_BIT; } - if (ParseRules::is_unreserved(c)) { - tparseRulesCType[c] |= is_unreserved_BIT; + if (ParseRules::is_unreserved(i)) { + tparseRulesCType[i] |= is_unreserved_BIT; } - if (ParseRules::is_punct(c)) { - tparseRulesCType[c] |= is_punct_BIT; + if (ParseRules::is_punct(i)) { + tparseRulesCType[i] |= is_punct_BIT; } - if (ParseRules::is_end_of_url(c)) { - tparseRulesCType[c] |= is_end_of_url_BIT; + if (ParseRules::is_end_of_url(i)) { + tparseRulesCType[i] |= is_end_of_url_BIT; } - if (ParseRules::is_tspecials(c)) { - tparseRulesCType[c] |= is_tspecials_BIT; + if (ParseRules::is_tspecials(i)) { + tparseRulesCType[i] |= is_tspecials_BIT; } - if (ParseRules::is_spcr(c)) { - tparseRulesCType[c] |= is_spcr_BIT; + if (ParseRules::is_spcr(i)) { + tparseRulesCType[i] |= is_spcr_BIT; } - if (ParseRules::is_splf(c)) { - tparseRulesCType[c] |= is_splf_BIT; + if (ParseRules::is_splf(i)) { + tparseRulesCType[i] |= is_splf_BIT; } - if (ParseRules::is_wslfcr(c)) { - tparseRulesCType[c] |= is_wslfcr_BIT; + if (ParseRules::is_wslfcr(i)) { + tparseRulesCType[i] |= is_wslfcr_BIT; } - if (ParseRules::is_eow(c)) { - tparseRulesCType[c] |= is_eow_BIT; + if (ParseRules::is_eow(i)) { + tparseRulesCType[i] |= is_eow_BIT; } - if (ParseRules::is_token(c)) { - tparseRulesCType[c] |= is_token_BIT; + if (ParseRules::is_token(i)) { + tparseRulesCType[i] |= is_token_BIT; } - if (ParseRules::is_uri(c)) { - tparseRulesCType[c] |= is_uri_BIT; + if (ParseRules::is_uri(i)) { + tparseRulesCType[i] |= is_uri_BIT; } - if (ParseRules::is_sep(c)) { - tparseRulesCType[c] |= is_sep_BIT; + if (ParseRules::is_sep(i)) { + tparseRulesCType[i] |= is_sep_BIT; } - if (ParseRules::is_empty(c)) { - tparseRulesCType[c] |= is_empty_BIT; + if (ParseRules::is_empty(i)) { + tparseRulesCType[i] |= is_empty_BIT; } - if (ParseRules::is_alnum(c)) { - tparseRulesCType[c] |= is_alnum_BIT; + if (ParseRules::is_alnum(i)) { + tparseRulesCType[i] |= is_alnum_BIT; } - if (ParseRules::is_space(c)) { - tparseRulesCType[c] |= is_space_BIT; + if (ParseRules::is_space(i)) { + tparseRulesCType[i] |= is_space_BIT; } - if (ParseRules::is_control(c)) { - tparseRulesCType[c] |= is_control_BIT; + if (ParseRules::is_control(i)) { + tparseRulesCType[i] |= is_control_BIT; } - if (ParseRules::is_mime_sep(c)) { - tparseRulesCType[c] |= is_mime_sep_BIT; + if (ParseRules::is_mime_sep(i)) { + tparseRulesCType[i] |= is_mime_sep_BIT; } - if (ParseRules::is_http_field_name(c)) { - tparseRulesCType[c] |= is_http_field_name_BIT; + if (ParseRules::is_http_field_name(i)) { + tparseRulesCType[i] |= is_http_field_name_BIT; } - if (ParseRules::is_http_field_value(c)) { - tparseRulesCType[c] |= is_http_field_value_BIT; + if (ParseRules::is_http_field_value(i)) { + tparseRulesCType[i] |= is_http_field_value_BIT; } } - FILE *fp = fopen("ParseRulesCType", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "/* %3d (%c) */\t", c, (isprint(c) ? c : '?')); - fprintf(fp, "0x%08X%c\t\t", tparseRulesCType[c], (c != 255 ? ',' : ' ')); - fprintf(fp, "/* [%s] */\n", uint_to_binary((tparseRulesCType[c]))); + // Write ParseRulesCType (bitmask table with binary representation) + std::ofstream fp("ParseRulesCType"); + for (uint16_t i = 0; i < 256; ++i) { + fp << "/* " << std::setw(3) << i << " (" << (isprint(i) ? static_cast(i) : '?') << ") */\t"; + fp << "0x" << std::hex << std::setw(8) << std::setfill('0') << tparseRulesCType[i] << (i != 255 ? ",\t\t" : "\t\t"); + fp << "/* [" << uint_to_binary(tparseRulesCType[i]) << "] */\n"; } - fclose(fp); - fp = fopen("ParseRulesCTypeToUpper", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "%d%c\n", tparseRulesCTypeToUpper[c], c != 255 ? ',' : ' '); + + // Write ParseRulesCTypeToUpper (uppercase conversion table) + { + std::ofstream fp("ParseRulesCTypeToUpper"); + for (uint16_t i = 0; i < 256; ++i) { + fp << "(uint8_t)" << static_cast(tparseRulesCTypeToUpper[i]) << (i != 255 ? ',' : ' ') << '\n'; + } } - fclose(fp); - fp = fopen("ParseRulesCTypeToLower", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "%d%c\n", tparseRulesCTypeToLower[c], c != 255 ? ',' : ' '); + + // Write ParseRulesCTypeToLower (lowercase conversion table) + { + std::ofstream fp("ParseRulesCTypeToLower"); + for (uint16_t i = 0; i < 256; ++i) { + fp << "(uint8_t)" << static_cast(tparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; + } } - fclose(fp); return (0); } From 7a78aac4f8f5d9ef6c9291ecab27decd241ac50f Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Mon, 25 May 2026 10:43:57 +0100 Subject: [PATCH 2/6] fix: use unsigned literal in bit shift to avoid undefined behaviour - Replace `1` with `1U` in bit shift operation to prevent undefined behaviour when shifting into sign bit of signed integer - Shifting a signed integer (e.g., `1 << 31`) into its sign bit invokes undefined behaviour per the C++ standard - Using an unsigned literal (`1U`) ensures well-defined behaviour for all shift amounts --- src/tscore/CompileParseRules.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index 52a7be7ec78..531170a15b8 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -126,7 +126,7 @@ uint_to_binary(uint32_t u) { std::string buf(32, '0'); for (uint8_t i = 0; i < 32; i++) { - if (u & (1 << (31 - i))) { + if (u & (1U << (31 - i))) { buf[i] = '1'; } } From a653e16836910f790ed85739edf778a3b0c91e32 Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Sun, 24 May 2026 21:35:48 +0100 Subject: [PATCH 3/6] fix: use unsigned char for byte values to avoid implementation-defined behaviour - Change `char cc` to `unsigned char cc` to ensure consistent handling of byte values (0-255) - `char` can be signed or unsigned depending on the platform, causing implementation-defined behaviour for values > 127 - Using `unsigned char` guarantees correct interpretation of all byte values --- src/tscore/CompileParseRules.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index 531170a15b8..e883b68ca79 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -204,7 +204,7 @@ main() tparseRulesCType[i] |= is_hex_BIT; } - char cc = static_cast(i); + unsigned char cc = static_cast(i); if (ParseRules::is_pchar(&cc)) { tparseRulesCType[i] |= is_pchar_BIT; From 35b39b704f30ec9e7cc767b515f1ca9a0ce2fbdb Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Sun, 24 May 2026 20:25:54 +0100 Subject: [PATCH 4/6] refactor: adopt Mozilla style guide and add file error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename functions to PascalCase (uint_to_binary → UintToBinary) - Add `g` prefix to global variables (example: parseRulesCType → gParseRulesCType) - Rename ParseRules classification functions and constants to PascalCase (is_* → Is*, *_BIT → IS_*) - Add file existence checks before writing with error messages to stderr - Add `#include ` for std::cerr - Rename `fp` to `outputFile` for improved clarity - Change `char` to `unsigned char` for byte value handling - Update all Doxygen comments to reflect new naming conventions --- src/tscore/CompileParseRules.cc | 239 +++++++++++++++++--------------- 1 file changed, 126 insertions(+), 113 deletions(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index e883b68ca79..a8d38591bf3 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -24,9 +24,9 @@ * It is executed during the build process and is **not part of the runtime library**. * * The generated tables are written to the following files: - * - @c ParseRulesCType: Bitmask of character type flags (32-bit values). - * - @c ParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). - * - @c ParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). + * - @c gParseRulesCType: Bitmask of character type flags (32-bit values). + * - @c gParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). + * - @c gParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). * * These files are typically included as static data in @c ParseRules.cc. * @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "tscore/ParseRules.h" @@ -52,61 +53,61 @@ * @brief Placeholder for the character type bitmask table. * * This array is initialized to zero and serves as a template for the generated - * @c ParseRulesCType table. It is **not used at runtime**. + * @c gParseRulesCType table. It is **not used at runtime**. * * @note The actual populated table is generated by @c main() and written to - * the @c ParseRulesCType file. + * the @c gParseRulesCType file. */ -const uint32_t parseRulesCType[256] = {0}; +const uint32_t gParseRulesCType[256] = {0}; /** * @brief Placeholder for the uppercase conversion table. * * This array is initialized to zero and serves as a template for the generated - * @c ParseRulesCTypeToUpper table. It is **not used at runtime**. + * @c gParseRulesCTypeToUpper table. It is **not used at runtime**. * * @note The actual populated table is generated by @c main() and written to - * the @c ParseRulesCTypeToUpper file. + * the @c gParseRulesCTypeToUpper file. */ -const uint8_t parseRulesCTypeToUpper[256] = {0}; +const uint8_t gParseRulesCTypeToUpper[256] = {0}; /** * @brief Placeholder for the lowercase conversion table. * * This array is initialized to zero and serves as a template for the generated - * @c ParseRulesCTypeToLower table. It is **not used at runtime**. + * @c gParseRulesCTypeToLower table. It is **not used at runtime**. * * @note The actual populated table is generated by @c main() and written to - * the @c ParseRulesCTypeToLower file. + * the @c gParseRulesCTypeToLower file. */ -const uint8_t parseRulesCTypeToLower[256] = {0}; +const uint8_t gParseRulesCTypeToLower[256] = {0}; /** * @brief Working array for character type bitmasks. * * Temporary storage for computed character classification bitmasks. - * Populated by @c main() and written to the @c ParseRulesCType output file. + * Populated by @c main() and written to the @c gParseRulesCType output file. * * @note Each entry is a 32-bit bitmask where each bit represents a character * classification (e.g., @c is_alpha_BIT, @c is_digit_BIT). */ -uint32_t tparseRulesCType[256]; +uint32_t gTparseRulesCType[256]; /** * @brief Working array for uppercase character conversion. * * Temporary storage for uppercase character mappings (0-255). - * Populated by @c main() and written to the @c ParseRulesCTypeToUpper output file. + * Populated by @c main() and written to the @c gParseRulesCTypeToUpper output file. */ -uint8_t tparseRulesCTypeToUpper[256]; +uint8_t gTparseRulesCTypeToUpper[256]; /** * @brief Working array for lowercase character conversion. * * Temporary storage for lowercase character mappings (0-255). - * Populated by @c main() and written to the @c ParseRulesCTypeToLower output file. + * Populated by @c main() and written to the @c gParseRulesCTypeToLower output file. */ -uint8_t tparseRulesCTypeToLower[256]; +uint8_t gTparseRulesCTypeToLower[256]; /** * @brief Convert a 32-bit unsigned integer to its binary string representation. @@ -116,13 +117,13 @@ uint8_t tparseRulesCTypeToLower[256]; * * @param u The 32-bit unsigned integer to convert. * @return @c std::string containing the 32-character binary representation. - * Example: @c uint_to_binary(5) returns @c "00000000000000000000000000000101". + * Example: @c UintToBinary(5) returns @c "00000000000000000000000000000101". * * @note This function is **thread-safe** because it returns a new @c std::string * by value (no shared static buffer). */ std::string -uint_to_binary(uint32_t u) +UintToBinary(uint32_t u) { std::string buf(32, '0'); for (uint8_t i = 0; i < 32; i++) { @@ -139,34 +140,34 @@ uint_to_binary(uint32_t u) * This function performs the following steps: * * 1. For each ASCII character (0-255): - * - Initializes the working arrays (@c tparseRulesCType, @c tparseRulesCTypeToUpper, - * @c tparseRulesCTypeToLower). + * - Initializes the working arrays (@c gTparseRulesCType, @c gTparseRulesCTypeToUpper, + * @c gTparseRulesCTypeToLower). * - Tests the character against all @c ParseRules classification functions. - * - Sets the corresponding bit in @c tparseRulesCType[i] for each matching classification. - * - Stores the uppercase/lowercase conversion values in @c tparseRulesCTypeToUpper - * and @c tparseRulesCTypeToLower. + * - Sets the corresponding bit in @c gTparseRulesCType[i] for each matching classification. + * - Stores the uppercase/lowercase conversion values in @c gTparseRulesCTypeToUpper + * and @c gTparseRulesCTypeToLower. * * 2. Writes three output files using @c std::ofstream (RAII): - * - @c ParseRulesCType: Contains hexadecimal bitmask values and their binary + * - @c gParseRulesCType: Contains hexadecimal bitmask values and their binary * representations. Format: C-style array initialization. - * - @c ParseRulesCTypeToUpper: Contains uppercase conversion values for each + * - @c gParseRulesCTypeToUpper: Contains uppercase conversion values for each * character. Format: @c (uint8_t)X, - * - @c ParseRulesCTypeToLower: Contains lowercase conversion values for each + * - @c gParseRulesCTypeToLower: Contains lowercase conversion values for each * character. Format: @c (uint8_t)X, * * @return 0 on successful completion. * * @note The classification functions from @c ParseRules.h include: - * - **Character types**: @c is_char, @c is_alpha, @c is_digit, @c is_alnum, - * @c is_ctl, @c is_ws, @c is_hex, @c is_pchar, @c is_token, @c is_uri, - * @c is_sep, @c is_empty. - * - **Case types**: @c is_upalpha, @c is_loalpha. - * - **Safety/encoding**: @c is_safe, @c is_unsafe, @c is_reserved, - * @c is_unreserved, @c is_national. - * - **Special categories**: @c is_punct, @c is_tspecials, @c is_end_of_url. - * - **Whitespace variants**: @c is_spcr, @c is_splf, @c is_wslfcr, @c is_eow. - * - **HTTP/MIME**: @c is_http_field_name, @c is_http_field_value, - * @c is_mime_sep, @c is_control. + * - **Character types**: @c IsChar, @c IsAlpha, @c IsDigit, @c IsAlnum, + * @c IsCtl, @c IsWs, @c IsHex, @c IsPchar, @c IsToken, @c IsUri, + * @c IsSep, @c IsEmpty. + * - **Case types**: @c IsUpalpha, @c IsLoalpha. + * - **Safety/encoding**: @c IsSafe, @c IsUnsafe, @c IsReserved, + * @c IsUnreserved, @c IsNational. + * - **Special categories**: @c IsPunct, @c IsTspecials, @c IsEndOfUrl. + * - **Whitespace variants**: @c IsSpcr, @c IsSplf, @c IsWslfcr, @c IsEow. + * - **HTTP/MIME**: @c IsHttpFieldName, @c IsHttpFieldValue, + * @c IsMimeSep, @c IsControl. * * @see ParseRules.h for detailed descriptions of each classification function. * @see ParseRules.cc for runtime usage of the generated tables. @@ -175,132 +176,144 @@ int main() { for (uint16_t i = 0; i < 256; i++) { - tparseRulesCType[i] = 0; - tparseRulesCTypeToLower[i] = static_cast(ParseRules::ink_tolower(i)); - tparseRulesCTypeToUpper[i] = static_cast(ParseRules::ink_toupper(i)); + gTparseRulesCType[i] = 0; + gTparseRulesCTypeToLower[i] = static_cast(ParseRules::ink_tolower(i)); + gTparseRulesCTypeToUpper[i] = static_cast(ParseRules::ink_toupper(i)); - if (ParseRules::is_char(i)) { - tparseRulesCType[i] |= is_char_BIT; + if (ParseRules::IsChar(index)) { + gTparseRulesCType[index] |= IS_CHAR_BIT; } - if (ParseRules::is_upalpha(i)) { - tparseRulesCType[i] |= is_upalpha_BIT; + if (ParseRules::IsUpalpha(index)) { + gTparseRulesCType[index] |= IS_UPALPHA_BIT; } - if (ParseRules::is_loalpha(i)) { - tparseRulesCType[i] |= is_loalpha_BIT; + if (ParseRules::IsLoalpha(index)) { + gTparseRulesCType[index] |= IS_LOALPHA_BIT; } - if (ParseRules::is_alpha(i)) { - tparseRulesCType[i] |= is_alpha_BIT; + if (ParseRules::IsAlpha(index)) { + gTparseRulesCType[index] |= IS_ALPHA_BIT; } - if (ParseRules::is_digit(i)) { - tparseRulesCType[i] |= is_digit_BIT; + if (ParseRules::IsDigit(index)) { + gTparseRulesCType[index] |= IS_DIGIT_BIT; } - if (ParseRules::is_ctl(i)) { - tparseRulesCType[i] |= is_ctl_BIT; + if (ParseRules::IsCtl(index)) { + gTparseRulesCType[index] |= IS_CTL_BIT; } - if (ParseRules::is_ws(i)) { - tparseRulesCType[i] |= is_ws_BIT; + if (ParseRules::IsWs(index)) { + gTparseRulesCType[index] |= IS_WS_BIT; } - if (ParseRules::is_hex(i)) { - tparseRulesCType[i] |= is_hex_BIT; + if (ParseRules::IsHex(index)) { + gTparseRulesCType[index] |= IS_HEX_BIT; } unsigned char cc = static_cast(i); - if (ParseRules::is_pchar(&cc)) { - tparseRulesCType[i] |= is_pchar_BIT; + if (ParseRules::IsPchar(¤tChar)) { + gTparseRulesCType[index] |= IS_PCHAR_BIT; } - if (ParseRules::is_extra(i)) { - tparseRulesCType[i] |= is_extra_BIT; + if (ParseRules::IsExtra(index)) { + gTparseRulesCType[index] |= IS_EXTRA_BIT; } - if (ParseRules::is_safe(i)) { - tparseRulesCType[i] |= is_safe_BIT; + if (ParseRules::IsSafe(index)) { + gTparseRulesCType[index] |= IS_SAFE_BIT; } - if (ParseRules::is_unsafe(i)) { - tparseRulesCType[i] |= is_unsafe_BIT; + if (ParseRules::IsUnsafe(index)) { + gTparseRulesCType[index] |= IS_UNSAFE_BIT; } - if (ParseRules::is_national(i)) { - tparseRulesCType[i] |= is_national_BIT; + if (ParseRules::IsNational(index)) { + gTparseRulesCType[index] |= IS_NATIONAL_BIT; } - if (ParseRules::is_reserved(i)) { - tparseRulesCType[i] |= is_reserved_BIT; + if (ParseRules::IsReserved(index)) { + gTparseRulesCType[index] |= IS_RESERVED_BIT; } - if (ParseRules::is_unreserved(i)) { - tparseRulesCType[i] |= is_unreserved_BIT; + if (ParseRules::IsUnreserved(index)) { + gTparseRulesCType[index] |= IS_UNRESERVED_BIT; } - if (ParseRules::is_punct(i)) { - tparseRulesCType[i] |= is_punct_BIT; + if (ParseRules::IsPunct(index)) { + gTparseRulesCType[index] |= IS_PUNCT_BIT; } - if (ParseRules::is_end_of_url(i)) { - tparseRulesCType[i] |= is_end_of_url_BIT; + if (ParseRules::IsEndOfUrl(index)) { + gTparseRulesCType[index] |= IS_END_OF_URL_BIT; } - if (ParseRules::is_tspecials(i)) { - tparseRulesCType[i] |= is_tspecials_BIT; + if (ParseRules::IsTspecials(index)) { + gTparseRulesCType[index] |= IS_TSPECIALS_BIT; } - if (ParseRules::is_spcr(i)) { - tparseRulesCType[i] |= is_spcr_BIT; + if (ParseRules::IsSpcr(index)) { + gTparseRulesCType[index] |= IS_SPCR_BIT; } - if (ParseRules::is_splf(i)) { - tparseRulesCType[i] |= is_splf_BIT; + if (ParseRules::IsSplf(index)) { + gTparseRulesCType[index] |= IS_SPLF_BIT; } - if (ParseRules::is_wslfcr(i)) { - tparseRulesCType[i] |= is_wslfcr_BIT; + if (ParseRules::IsWslfcr(index)) { + gTparseRulesCType[index] |= IS_WSLFCR_BIT; } - if (ParseRules::is_eow(i)) { - tparseRulesCType[i] |= is_eow_BIT; + if (ParseRules::IsEow(index)) { + gTparseRulesCType[index] |= IS_EOW_BIT; } - if (ParseRules::is_token(i)) { - tparseRulesCType[i] |= is_token_BIT; + if (ParseRules::IsToken(index)) { + gTparseRulesCType[index] |= IS_TOKEN_BIT; } - if (ParseRules::is_uri(i)) { - tparseRulesCType[i] |= is_uri_BIT; + if (ParseRules::IsUri(index)) { + gTparseRulesCType[index] |= IS_URI_BIT; } - if (ParseRules::is_sep(i)) { - tparseRulesCType[i] |= is_sep_BIT; + if (ParseRules::IsSep(index)) { + gTparseRulesCType[index] |= IS_SEP_BIT; } - if (ParseRules::is_empty(i)) { - tparseRulesCType[i] |= is_empty_BIT; + if (ParseRules::IsEmpty(index)) { + gTparseRulesCType[index] |= IS_EMPTY_BIT; } - if (ParseRules::is_alnum(i)) { - tparseRulesCType[i] |= is_alnum_BIT; + if (ParseRules::IsAlnum(index)) { + gTparseRulesCType[index] |= IS_ALNUM_BIT; } - if (ParseRules::is_space(i)) { - tparseRulesCType[i] |= is_space_BIT; + if (ParseRules::IsSpace(index)) { + gTparseRulesCType[index] |= IS_SPACE_BIT; } - if (ParseRules::is_control(i)) { - tparseRulesCType[i] |= is_control_BIT; + if (ParseRules::IsControl(index)) { + gTparseRulesCType[index] |= IS_CONTROL_BIT; } - if (ParseRules::is_mime_sep(i)) { - tparseRulesCType[i] |= is_mime_sep_BIT; + if (ParseRules::IsMimeSep(index)) { + gTparseRulesCType[index] |= IS_MIME_SEP_BIT; } - if (ParseRules::is_http_field_name(i)) { - tparseRulesCType[i] |= is_http_field_name_BIT; + if (ParseRules::IsHttpFieldName(index)) { + gTparseRulesCType[index] |= IS_HTTP_FIELD_NAME_BIT; } - if (ParseRules::is_http_field_value(i)) { - tparseRulesCType[i] |= is_http_field_value_BIT; + if (ParseRules::IsHttpFieldValue(index)) { + gTparseRulesCType[index] |= IS_HTTP_FIELD_VALUE_BIT; } } // Write ParseRulesCType (bitmask table with binary representation) - std::ofstream fp("ParseRulesCType"); + std::ofstream outputFile("ParseRulesCType"); + if (!outputFile) { + std::cerr << "Error: Failed to open ParseRulesCType for writing." << std::endl; + return 1; + } for (uint16_t i = 0; i < 256; ++i) { - fp << "/* " << std::setw(3) << i << " (" << (isprint(i) ? static_cast(i) : '?') << ") */\t"; - fp << "0x" << std::hex << std::setw(8) << std::setfill('0') << tparseRulesCType[i] << (i != 255 ? ",\t\t" : "\t\t"); - fp << "/* [" << uint_to_binary(tparseRulesCType[i]) << "] */\n"; + outputFile << "/* " << std::setw(3) << i << " (" << (isprint(i) ? static_cast(i) : '?') << ") */\t"; + outputFile << "0x" << std::hex << std::setw(8) << std::setfill('0') << gTparseRulesCType[i] << (i != 255 ? ",\t\t" : "\t\t"); + outputFile << "/* [" << uint_to_binary(gTparseRulesCType[i]) << "] */\n"; } // Write ParseRulesCTypeToUpper (uppercase conversion table) { - std::ofstream fp("ParseRulesCTypeToUpper"); + std::ofstream outputFile("ParseRulesCTypeToUpper"); + if (!outputFile) { + std::cerr << "Error: Failed to open ParseRulesCTypeToUpper for writing." << std::endl; + return 1; + } for (uint16_t i = 0; i < 256; ++i) { - fp << "(uint8_t)" << static_cast(tparseRulesCTypeToUpper[i]) << (i != 255 ? ',' : ' ') << '\n'; + outputFile << "(uint8_t)" << static_cast(gTparseRulesCTypeToUpper[i]) << (i != 255 ? ',' : ' ') << '\n'; } } // Write ParseRulesCTypeToLower (lowercase conversion table) { - std::ofstream fp("ParseRulesCTypeToLower"); + std::ofstream outputFile("ParseRulesCTypeToLower"); + if (!outputFile) { + std::cerr << "Error: Failed to open ParseRulesCTypeToLower for writing." << std::endl; + return 1; + } for (uint16_t i = 0; i < 256; ++i) { - fp << "(uint8_t)" << static_cast(tparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; + fp << "(uint8_t)" << static_cast(gTparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; } } From 77d846b5abf05965ee127ccf275f3bc476617666 Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Sun, 24 May 2026 22:37:02 +0100 Subject: [PATCH 5/6] fix: correct inconsistent variable names and use PascalCase functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace `index` with `i` to match loop variable - Replace `currentChar` with `cc` to match variable declaration - Use PascalCase for ParseRules functions (ink_tolower → InkTolower, ink_toupper → InkToupper) - Use PascalCase for UintToBinary function - Replace `fp` with `outputFile` in last file writing block for consistency --- src/tscore/CompileParseRules.cc | 136 ++++++++++++++++---------------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index a8d38591bf3..a0a6f9d13aa 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -177,107 +177,107 @@ main() { for (uint16_t i = 0; i < 256; i++) { gTparseRulesCType[i] = 0; - gTparseRulesCTypeToLower[i] = static_cast(ParseRules::ink_tolower(i)); - gTparseRulesCTypeToUpper[i] = static_cast(ParseRules::ink_toupper(i)); + gTparseRulesCTypeToLower[i] = static_cast(ParseRules::InkTolower(i)); + gTparseRulesCTypeToUpper[i] = static_cast(ParseRules::InkToupper(i)); - if (ParseRules::IsChar(index)) { - gTparseRulesCType[index] |= IS_CHAR_BIT; + if (ParseRules::IsChar(i)) { + gTparseRulesCType[i] |= IS_CHAR_BIT; } - if (ParseRules::IsUpalpha(index)) { - gTparseRulesCType[index] |= IS_UPALPHA_BIT; + if (ParseRules::IsUpalpha(i)) { + gTparseRulesCType[i] |= IS_UPALPHA_BIT; } - if (ParseRules::IsLoalpha(index)) { - gTparseRulesCType[index] |= IS_LOALPHA_BIT; + if (ParseRules::IsLoalpha(i)) { + gTparseRulesCType[i] |= IS_LOALPHA_BIT; } - if (ParseRules::IsAlpha(index)) { - gTparseRulesCType[index] |= IS_ALPHA_BIT; + if (ParseRules::IsAlpha(i)) { + gTparseRulesCType[i] |= IS_ALPHA_BIT; } - if (ParseRules::IsDigit(index)) { - gTparseRulesCType[index] |= IS_DIGIT_BIT; + if (ParseRules::IsDigit(i)) { + gTparseRulesCType[i] |= IS_DIGIT_BIT; } - if (ParseRules::IsCtl(index)) { - gTparseRulesCType[index] |= IS_CTL_BIT; + if (ParseRules::IsCtl(i)) { + gTparseRulesCType[i] |= IS_CTL_BIT; } - if (ParseRules::IsWs(index)) { - gTparseRulesCType[index] |= IS_WS_BIT; + if (ParseRules::IsWs(i)) { + gTparseRulesCType[i] |= IS_WS_BIT; } - if (ParseRules::IsHex(index)) { - gTparseRulesCType[index] |= IS_HEX_BIT; + if (ParseRules::IsHex(i)) { + gTparseRulesCType[i] |= IS_HEX_BIT; } unsigned char cc = static_cast(i); - if (ParseRules::IsPchar(¤tChar)) { - gTparseRulesCType[index] |= IS_PCHAR_BIT; + if (ParseRules::IsPchar(&cc)) { + gTparseRulesCType[i] |= IS_PCHAR_BIT; } - if (ParseRules::IsExtra(index)) { - gTparseRulesCType[index] |= IS_EXTRA_BIT; + if (ParseRules::IsExtra(i)) { + gTparseRulesCType[i] |= IS_EXTRA_BIT; } - if (ParseRules::IsSafe(index)) { - gTparseRulesCType[index] |= IS_SAFE_BIT; + if (ParseRules::IsSafe(i)) { + gTparseRulesCType[i] |= IS_SAFE_BIT; } - if (ParseRules::IsUnsafe(index)) { - gTparseRulesCType[index] |= IS_UNSAFE_BIT; + if (ParseRules::IsUnsafe(i)) { + gTparseRulesCType[i] |= IS_UNSAFE_BIT; } - if (ParseRules::IsNational(index)) { - gTparseRulesCType[index] |= IS_NATIONAL_BIT; + if (ParseRules::IsNational(i)) { + gTparseRulesCType[i] |= IS_NATIONAL_BIT; } - if (ParseRules::IsReserved(index)) { - gTparseRulesCType[index] |= IS_RESERVED_BIT; + if (ParseRules::IsReserved(i)) { + gTparseRulesCType[i] |= IS_RESERVED_BIT; } - if (ParseRules::IsUnreserved(index)) { - gTparseRulesCType[index] |= IS_UNRESERVED_BIT; + if (ParseRules::IsUnreserved(i)) { + gTparseRulesCType[i] |= IS_UNRESERVED_BIT; } - if (ParseRules::IsPunct(index)) { - gTparseRulesCType[index] |= IS_PUNCT_BIT; + if (ParseRules::IsPunct(i)) { + gTparseRulesCType[i] |= IS_PUNCT_BIT; } - if (ParseRules::IsEndOfUrl(index)) { - gTparseRulesCType[index] |= IS_END_OF_URL_BIT; + if (ParseRules::IsEndOfUrl(i)) { + gTparseRulesCType[i] |= IS_END_OF_URL_BIT; } - if (ParseRules::IsTspecials(index)) { - gTparseRulesCType[index] |= IS_TSPECIALS_BIT; + if (ParseRules::IsTspecials(i)) { + gTparseRulesCType[i] |= IS_TSPECIALS_BIT; } - if (ParseRules::IsSpcr(index)) { - gTparseRulesCType[index] |= IS_SPCR_BIT; + if (ParseRules::IsSpcr(i)) { + gTparseRulesCType[i] |= IS_SPCR_BIT; } - if (ParseRules::IsSplf(index)) { - gTparseRulesCType[index] |= IS_SPLF_BIT; + if (ParseRules::IsSplf(i)) { + gTparseRulesCType[i] |= IS_SPLF_BIT; } - if (ParseRules::IsWslfcr(index)) { - gTparseRulesCType[index] |= IS_WSLFCR_BIT; + if (ParseRules::IsWslfcr(i)) { + gTparseRulesCType[i] |= IS_WSLFCR_BIT; } - if (ParseRules::IsEow(index)) { - gTparseRulesCType[index] |= IS_EOW_BIT; + if (ParseRules::IsEow(i)) { + gTparseRulesCType[i] |= IS_EOW_BIT; } - if (ParseRules::IsToken(index)) { - gTparseRulesCType[index] |= IS_TOKEN_BIT; + if (ParseRules::IsToken(i)) { + gTparseRulesCType[i] |= IS_TOKEN_BIT; } - if (ParseRules::IsUri(index)) { - gTparseRulesCType[index] |= IS_URI_BIT; + if (ParseRules::IsUri(i)) { + gTparseRulesCType[i] |= IS_URI_BIT; } - if (ParseRules::IsSep(index)) { - gTparseRulesCType[index] |= IS_SEP_BIT; + if (ParseRules::IsSep(i)) { + gTparseRulesCType[i] |= IS_SEP_BIT; } - if (ParseRules::IsEmpty(index)) { - gTparseRulesCType[index] |= IS_EMPTY_BIT; + if (ParseRules::IsEmpty(i)) { + gTparseRulesCType[i] |= IS_EMPTY_BIT; } - if (ParseRules::IsAlnum(index)) { - gTparseRulesCType[index] |= IS_ALNUM_BIT; + if (ParseRules::IsAlnum(i)) { + gTparseRulesCType[i] |= IS_ALNUM_BIT; } - if (ParseRules::IsSpace(index)) { - gTparseRulesCType[index] |= IS_SPACE_BIT; + if (ParseRules::IsSpace(i)) { + gTparseRulesCType[i] |= IS_SPACE_BIT; } - if (ParseRules::IsControl(index)) { - gTparseRulesCType[index] |= IS_CONTROL_BIT; + if (ParseRules::IsControl(i)) { + gTparseRulesCType[i] |= IS_CONTROL_BIT; } - if (ParseRules::IsMimeSep(index)) { - gTparseRulesCType[index] |= IS_MIME_SEP_BIT; + if (ParseRules::IsMimeSep(i)) { + gTparseRulesCType[i] |= IS_MIME_SEP_BIT; } - if (ParseRules::IsHttpFieldName(index)) { - gTparseRulesCType[index] |= IS_HTTP_FIELD_NAME_BIT; + if (ParseRules::IsHttpFieldName(i)) { + gTparseRulesCType[i] |= IS_HTTP_FIELD_NAME_BIT; } - if (ParseRules::IsHttpFieldValue(index)) { - gTparseRulesCType[index] |= IS_HTTP_FIELD_VALUE_BIT; + if (ParseRules::IsHttpFieldValue(i)) { + gTparseRulesCType[i] |= IS_HTTP_FIELD_VALUE_BIT; } } @@ -290,7 +290,7 @@ main() for (uint16_t i = 0; i < 256; ++i) { outputFile << "/* " << std::setw(3) << i << " (" << (isprint(i) ? static_cast(i) : '?') << ") */\t"; outputFile << "0x" << std::hex << std::setw(8) << std::setfill('0') << gTparseRulesCType[i] << (i != 255 ? ",\t\t" : "\t\t"); - outputFile << "/* [" << uint_to_binary(gTparseRulesCType[i]) << "] */\n"; + outputFile << "/* [" << UintToBinary(gTparseRulesCType[i]) << "] */\n"; } // Write ParseRulesCTypeToUpper (uppercase conversion table) @@ -313,7 +313,7 @@ main() return 1; } for (uint16_t i = 0; i < 256; ++i) { - fp << "(uint8_t)" << static_cast(gTparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; + outputFile << "(uint8_t)" << static_cast(gTparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; } } From 2ce20791ab2f99f1556f8d93dd6bac700f81c511 Mon Sep 17 00:00:00 2001 From: Graham Sedman Date: Mon, 25 May 2026 11:35:03 +0100 Subject: [PATCH 6/6] style: separate license from Doxygen comment block for Rat compliance - Split file header into separate C-style comment block for license and Doxygen block for documentation - Required to pass Apache Release Audit Tool (Rat) - Fix formatting: indent Apache license URL for consistency - Fix formatting: remove extra space before "Fixed-width integer types" --- src/tscore/CompileParseRules.cc | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index a0a6f9d13aa..ab0ef6729db 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -1,15 +1,13 @@ /** * @file CompileParseRules.cc * - * @brief Build-time utility for generating ParseRules character classification tables. - * * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file + * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file + * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -18,22 +16,26 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. + */ + +/** + * @brief Build-time utility for generating ParseRules character classification tables. * * This standalone C++ program generates static lookup tables used by the * Traffic Server runtime for O(1) character classification and case conversion. * It is executed during the build process and is **not part of the runtime library**. * * The generated tables are written to the following files: - * - @c gParseRulesCType: Bitmask of character type flags (32-bit values). - * - @c gParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). - * - @c gParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). + * - @c ParseRulesCType: Bitmask of character type flags (32-bit values). + * - @c ParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). + * - @c ParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). * * These files are typically included as static data in @c ParseRules.cc. * * @note This tool uses modern C++ features, including: * - @c std::string for type-safe string handling. * - @c std::ofstream for RAII-based file I/O. - * - Fixed-width integer types (@c uint8_t, @c uint32_t) for portability. + * - Fixed-width integer types (@c uint8_t, @c uint32_t) for portability. * * @see ParseRules.h for character classification function declarations. * @see ParseRules.cc for runtime usage of generated tables.