diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index 6921072df83..ab0ef6729db 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -1,179 +1,323 @@ -/** @file - - A brief file description +/** + * @file CompileParseRules.cc + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ - @section license License +/** + * @brief Build-time utility for generating ParseRules character classification tables. + * + * This standalone C++ program generates static lookup tables used by the + * Traffic Server runtime for O(1) character classification and case conversion. + * It is executed during the build process and is **not part of the runtime library**. + * + * The generated tables are written to the following files: + * - @c ParseRulesCType: Bitmask of character type flags (32-bit values). + * - @c ParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). + * - @c ParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). + * + * These files are typically included as static data in @c ParseRules.cc. + * + * @note This tool uses modern C++ features, including: + * - @c std::string for type-safe string handling. + * - @c std::ofstream for RAII-based file I/O. + * - Fixed-width integer types (@c uint8_t, @c uint32_t) for portability. + * + * @see ParseRules.h for character classification function declarations. + * @see ParseRules.cc for runtime usage of generated tables. + */ - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at +#include +#include +#include +#include +#include +#include +#include - http://www.apache.org/licenses/LICENSE-2.0 +#include "tscore/ParseRules.h" - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +/** + * @brief Placeholder for the character type bitmask table. + * + * This array is initialized to zero and serves as a template for the generated + * @c gParseRulesCType table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c gParseRulesCType file. */ +const uint32_t gParseRulesCType[256] = {0}; -#define COMPILE_PARSE_RULES +/** + * @brief Placeholder for the uppercase conversion table. + * + * This array is initialized to zero and serves as a template for the generated + * @c gParseRulesCTypeToUpper table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c gParseRulesCTypeToUpper file. + */ +const uint8_t gParseRulesCTypeToUpper[256] = {0}; -#include "tscore/ParseRules.h" +/** + * @brief Placeholder for the lowercase conversion table. + * + * This array is initialized to zero and serves as a template for the generated + * @c gParseRulesCTypeToLower table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c gParseRulesCTypeToLower file. + */ +const uint8_t gParseRulesCTypeToLower[256] = {0}; -const unsigned int parseRulesCType[256] = {0}; -const char parseRulesCTypeToUpper[256] = {0}; -const char parseRulesCTypeToLower[256] = {0}; +/** + * @brief Working array for character type bitmasks. + * + * Temporary storage for computed character classification bitmasks. + * Populated by @c main() and written to the @c gParseRulesCType output file. + * + * @note Each entry is a 32-bit bitmask where each bit represents a character + * classification (e.g., @c is_alpha_BIT, @c is_digit_BIT). + */ +uint32_t gTparseRulesCType[256]; -unsigned int tparseRulesCType[256]; -char tparseRulesCTypeToUpper[256]; -char tparseRulesCTypeToLower[256]; +/** + * @brief Working array for uppercase character conversion. + * + * Temporary storage for uppercase character mappings (0-255). + * Populated by @c main() and written to the @c gParseRulesCTypeToUpper output file. + */ +uint8_t gTparseRulesCTypeToUpper[256]; -#include -#include -#include "tscore/ink_string.h" +/** + * @brief Working array for lowercase character conversion. + * + * Temporary storage for lowercase character mappings (0-255). + * Populated by @c main() and written to the @c gParseRulesCTypeToLower output file. + */ +uint8_t gTparseRulesCTypeToLower[256]; -static char * -uint_to_binary(unsigned int u) +/** + * @brief Convert a 32-bit unsigned integer to its binary string representation. + * + * Creates a 32-character string representing the binary value of the input, + * with leading zeros. The string is in **big-endian format** (MSB first). + * + * @param u The 32-bit unsigned integer to convert. + * @return @c std::string containing the 32-character binary representation. + * Example: @c UintToBinary(5) returns @c "00000000000000000000000000000101". + * + * @note This function is **thread-safe** because it returns a new @c std::string + * by value (no shared static buffer). + */ +std::string +UintToBinary(uint32_t u) { - int i; - static char buf[33]; - for (i = 0; i < 32; i++) { - buf[i] = ((u & (1 << (31 - i))) ? '1' : '0'); + std::string buf(32, '0'); + for (uint8_t i = 0; i < 32; i++) { + if (u & (1U << (31 - i))) { + buf[i] = '1'; + } } - buf[32] = '\0'; - return (buf); + return buf; } +/** + * @brief Generates character classification lookup tables. + * + * This function performs the following steps: + * + * 1. For each ASCII character (0-255): + * - Initializes the working arrays (@c gTparseRulesCType, @c gTparseRulesCTypeToUpper, + * @c gTparseRulesCTypeToLower). + * - Tests the character against all @c ParseRules classification functions. + * - Sets the corresponding bit in @c gTparseRulesCType[i] for each matching classification. + * - Stores the uppercase/lowercase conversion values in @c gTparseRulesCTypeToUpper + * and @c gTparseRulesCTypeToLower. + * + * 2. Writes three output files using @c std::ofstream (RAII): + * - @c gParseRulesCType: Contains hexadecimal bitmask values and their binary + * representations. Format: C-style array initialization. + * - @c gParseRulesCTypeToUpper: Contains uppercase conversion values for each + * character. Format: @c (uint8_t)X, + * - @c gParseRulesCTypeToLower: Contains lowercase conversion values for each + * character. Format: @c (uint8_t)X, + * + * @return 0 on successful completion. + * + * @note The classification functions from @c ParseRules.h include: + * - **Character types**: @c IsChar, @c IsAlpha, @c IsDigit, @c IsAlnum, + * @c IsCtl, @c IsWs, @c IsHex, @c IsPchar, @c IsToken, @c IsUri, + * @c IsSep, @c IsEmpty. + * - **Case types**: @c IsUpalpha, @c IsLoalpha. + * - **Safety/encoding**: @c IsSafe, @c IsUnsafe, @c IsReserved, + * @c IsUnreserved, @c IsNational. + * - **Special categories**: @c IsPunct, @c IsTspecials, @c IsEndOfUrl. + * - **Whitespace variants**: @c IsSpcr, @c IsSplf, @c IsWslfcr, @c IsEow. + * - **HTTP/MIME**: @c IsHttpFieldName, @c IsHttpFieldValue, + * @c IsMimeSep, @c IsControl. + * + * @see ParseRules.h for detailed descriptions of each classification function. + * @see ParseRules.cc for runtime usage of the generated tables. + */ int main() { - int c; - for (c = 0; c < 256; c++) { - tparseRulesCType[c] = 0; - tparseRulesCTypeToLower[c] = ParseRules::ink_tolower(c); - tparseRulesCTypeToUpper[c] = ParseRules::ink_toupper(c); + for (uint16_t i = 0; i < 256; i++) { + gTparseRulesCType[i] = 0; + gTparseRulesCTypeToLower[i] = static_cast(ParseRules::InkTolower(i)); + gTparseRulesCTypeToUpper[i] = static_cast(ParseRules::InkToupper(i)); - if (ParseRules::is_char(c)) { - tparseRulesCType[c] |= is_char_BIT; + if (ParseRules::IsChar(i)) { + gTparseRulesCType[i] |= IS_CHAR_BIT; } - if (ParseRules::is_upalpha(c)) { - tparseRulesCType[c] |= is_upalpha_BIT; + if (ParseRules::IsUpalpha(i)) { + gTparseRulesCType[i] |= IS_UPALPHA_BIT; } - if (ParseRules::is_loalpha(c)) { - tparseRulesCType[c] |= is_loalpha_BIT; + if (ParseRules::IsLoalpha(i)) { + gTparseRulesCType[i] |= IS_LOALPHA_BIT; } - if (ParseRules::is_alpha(c)) { - tparseRulesCType[c] |= is_alpha_BIT; + if (ParseRules::IsAlpha(i)) { + gTparseRulesCType[i] |= IS_ALPHA_BIT; } - if (ParseRules::is_digit(c)) { - tparseRulesCType[c] |= is_digit_BIT; + if (ParseRules::IsDigit(i)) { + gTparseRulesCType[i] |= IS_DIGIT_BIT; } - if (ParseRules::is_ctl(c)) { - tparseRulesCType[c] |= is_ctl_BIT; + if (ParseRules::IsCtl(i)) { + gTparseRulesCType[i] |= IS_CTL_BIT; } - if (ParseRules::is_ws(c)) { - tparseRulesCType[c] |= is_ws_BIT; + if (ParseRules::IsWs(i)) { + gTparseRulesCType[i] |= IS_WS_BIT; } - if (ParseRules::is_hex(c)) { - tparseRulesCType[c] |= is_hex_BIT; + if (ParseRules::IsHex(i)) { + gTparseRulesCType[i] |= IS_HEX_BIT; } - char cc = c; - if (ParseRules::is_pchar(&cc)) { - tparseRulesCType[c] |= is_pchar_BIT; + + unsigned char cc = static_cast(i); + + if (ParseRules::IsPchar(&cc)) { + gTparseRulesCType[i] |= IS_PCHAR_BIT; } - if (ParseRules::is_extra(c)) { - tparseRulesCType[c] |= is_extra_BIT; + if (ParseRules::IsExtra(i)) { + gTparseRulesCType[i] |= IS_EXTRA_BIT; } - if (ParseRules::is_safe(c)) { - tparseRulesCType[c] |= is_safe_BIT; + if (ParseRules::IsSafe(i)) { + gTparseRulesCType[i] |= IS_SAFE_BIT; } - if (ParseRules::is_unsafe(c)) { - tparseRulesCType[c] |= is_unsafe_BIT; + if (ParseRules::IsUnsafe(i)) { + gTparseRulesCType[i] |= IS_UNSAFE_BIT; } - if (ParseRules::is_national(c)) { - tparseRulesCType[c] |= is_national_BIT; + if (ParseRules::IsNational(i)) { + gTparseRulesCType[i] |= IS_NATIONAL_BIT; } - if (ParseRules::is_reserved(c)) { - tparseRulesCType[c] |= is_reserved_BIT; + if (ParseRules::IsReserved(i)) { + gTparseRulesCType[i] |= IS_RESERVED_BIT; } - if (ParseRules::is_unreserved(c)) { - tparseRulesCType[c] |= is_unreserved_BIT; + if (ParseRules::IsUnreserved(i)) { + gTparseRulesCType[i] |= IS_UNRESERVED_BIT; } - if (ParseRules::is_punct(c)) { - tparseRulesCType[c] |= is_punct_BIT; + if (ParseRules::IsPunct(i)) { + gTparseRulesCType[i] |= IS_PUNCT_BIT; } - if (ParseRules::is_end_of_url(c)) { - tparseRulesCType[c] |= is_end_of_url_BIT; + if (ParseRules::IsEndOfUrl(i)) { + gTparseRulesCType[i] |= IS_END_OF_URL_BIT; } - if (ParseRules::is_tspecials(c)) { - tparseRulesCType[c] |= is_tspecials_BIT; + if (ParseRules::IsTspecials(i)) { + gTparseRulesCType[i] |= IS_TSPECIALS_BIT; } - if (ParseRules::is_spcr(c)) { - tparseRulesCType[c] |= is_spcr_BIT; + if (ParseRules::IsSpcr(i)) { + gTparseRulesCType[i] |= IS_SPCR_BIT; } - if (ParseRules::is_splf(c)) { - tparseRulesCType[c] |= is_splf_BIT; + if (ParseRules::IsSplf(i)) { + gTparseRulesCType[i] |= IS_SPLF_BIT; } - if (ParseRules::is_wslfcr(c)) { - tparseRulesCType[c] |= is_wslfcr_BIT; + if (ParseRules::IsWslfcr(i)) { + gTparseRulesCType[i] |= IS_WSLFCR_BIT; } - if (ParseRules::is_eow(c)) { - tparseRulesCType[c] |= is_eow_BIT; + if (ParseRules::IsEow(i)) { + gTparseRulesCType[i] |= IS_EOW_BIT; } - if (ParseRules::is_token(c)) { - tparseRulesCType[c] |= is_token_BIT; + if (ParseRules::IsToken(i)) { + gTparseRulesCType[i] |= IS_TOKEN_BIT; } - if (ParseRules::is_uri(c)) { - tparseRulesCType[c] |= is_uri_BIT; + if (ParseRules::IsUri(i)) { + gTparseRulesCType[i] |= IS_URI_BIT; } - if (ParseRules::is_sep(c)) { - tparseRulesCType[c] |= is_sep_BIT; + if (ParseRules::IsSep(i)) { + gTparseRulesCType[i] |= IS_SEP_BIT; } - if (ParseRules::is_empty(c)) { - tparseRulesCType[c] |= is_empty_BIT; + if (ParseRules::IsEmpty(i)) { + gTparseRulesCType[i] |= IS_EMPTY_BIT; } - if (ParseRules::is_alnum(c)) { - tparseRulesCType[c] |= is_alnum_BIT; + if (ParseRules::IsAlnum(i)) { + gTparseRulesCType[i] |= IS_ALNUM_BIT; } - if (ParseRules::is_space(c)) { - tparseRulesCType[c] |= is_space_BIT; + if (ParseRules::IsSpace(i)) { + gTparseRulesCType[i] |= IS_SPACE_BIT; } - if (ParseRules::is_control(c)) { - tparseRulesCType[c] |= is_control_BIT; + if (ParseRules::IsControl(i)) { + gTparseRulesCType[i] |= IS_CONTROL_BIT; } - if (ParseRules::is_mime_sep(c)) { - tparseRulesCType[c] |= is_mime_sep_BIT; + if (ParseRules::IsMimeSep(i)) { + gTparseRulesCType[i] |= IS_MIME_SEP_BIT; } - if (ParseRules::is_http_field_name(c)) { - tparseRulesCType[c] |= is_http_field_name_BIT; + if (ParseRules::IsHttpFieldName(i)) { + gTparseRulesCType[i] |= IS_HTTP_FIELD_NAME_BIT; } - if (ParseRules::is_http_field_value(c)) { - tparseRulesCType[c] |= is_http_field_value_BIT; + if (ParseRules::IsHttpFieldValue(i)) { + gTparseRulesCType[i] |= IS_HTTP_FIELD_VALUE_BIT; } } - FILE *fp = fopen("ParseRulesCType", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "/* %3d (%c) */\t", c, (isprint(c) ? c : '?')); - fprintf(fp, "0x%08X%c\t\t", tparseRulesCType[c], (c != 255 ? ',' : ' ')); - fprintf(fp, "/* [%s] */\n", uint_to_binary((tparseRulesCType[c]))); + // Write ParseRulesCType (bitmask table with binary representation) + std::ofstream outputFile("ParseRulesCType"); + if (!outputFile) { + std::cerr << "Error: Failed to open ParseRulesCType for writing." << std::endl; + return 1; } - fclose(fp); - fp = fopen("ParseRulesCTypeToUpper", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "%d%c\n", tparseRulesCTypeToUpper[c], c != 255 ? ',' : ' '); + for (uint16_t i = 0; i < 256; ++i) { + outputFile << "/* " << std::setw(3) << i << " (" << (isprint(i) ? static_cast(i) : '?') << ") */\t"; + outputFile << "0x" << std::hex << std::setw(8) << std::setfill('0') << gTparseRulesCType[i] << (i != 255 ? ",\t\t" : "\t\t"); + outputFile << "/* [" << UintToBinary(gTparseRulesCType[i]) << "] */\n"; } - fclose(fp); - fp = fopen("ParseRulesCTypeToLower", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "%d%c\n", tparseRulesCTypeToLower[c], c != 255 ? ',' : ' '); + + // Write ParseRulesCTypeToUpper (uppercase conversion table) + { + std::ofstream outputFile("ParseRulesCTypeToUpper"); + if (!outputFile) { + std::cerr << "Error: Failed to open ParseRulesCTypeToUpper for writing." << std::endl; + return 1; + } + for (uint16_t i = 0; i < 256; ++i) { + outputFile << "(uint8_t)" << static_cast(gTparseRulesCTypeToUpper[i]) << (i != 255 ? ',' : ' ') << '\n'; + } + } + + // Write ParseRulesCTypeToLower (lowercase conversion table) + { + std::ofstream outputFile("ParseRulesCTypeToLower"); + if (!outputFile) { + std::cerr << "Error: Failed to open ParseRulesCTypeToLower for writing." << std::endl; + return 1; + } + for (uint16_t i = 0; i < 256; ++i) { + outputFile << "(uint8_t)" << static_cast(gTparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; + } } - fclose(fp); return (0); }