diff --git a/src/tscore/CompileParseRules.cc b/src/tscore/CompileParseRules.cc index 6921072df83..52a7be7ec78 100644 --- a/src/tscore/CompileParseRules.cc +++ b/src/tscore/CompileParseRules.cc @@ -1,179 +1,308 @@ -/** @file - - A brief file description - - @section license License +/** + * @file CompileParseRules.cc + * + * @brief Build-time utility for generating ParseRules character classification tables. + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This standalone C++ program generates static lookup tables used by the + * Traffic Server runtime for O(1) character classification and case conversion. + * It is executed during the build process and is **not part of the runtime library**. + * + * The generated tables are written to the following files: + * - @c ParseRulesCType: Bitmask of character type flags (32-bit values). + * - @c ParseRulesCTypeToUpper: Uppercase conversion table (uint8_t values). + * - @c ParseRulesCTypeToLower: Lowercase conversion table (uint8_t values). + * + * These files are typically included as static data in @c ParseRules.cc. + * + * @note This tool uses modern C++ features, including: + * - @c std::string for type-safe string handling. + * - @c std::ofstream for RAII-based file I/O. + * - Fixed-width integer types (@c uint8_t, @c uint32_t) for portability. + * + * @see ParseRules.h for character classification function declarations. + * @see ParseRules.cc for runtime usage of generated tables. + */ - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at +#include +#include +#include +#include +#include +#include - http://www.apache.org/licenses/LICENSE-2.0 +#include "tscore/ParseRules.h" - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +/** + * @brief Placeholder for the character type bitmask table. + * + * This array is initialized to zero and serves as a template for the generated + * @c ParseRulesCType table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c ParseRulesCType file. */ +const uint32_t parseRulesCType[256] = {0}; -#define COMPILE_PARSE_RULES +/** + * @brief Placeholder for the uppercase conversion table. + * + * This array is initialized to zero and serves as a template for the generated + * @c ParseRulesCTypeToUpper table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c ParseRulesCTypeToUpper file. + */ +const uint8_t parseRulesCTypeToUpper[256] = {0}; -#include "tscore/ParseRules.h" +/** + * @brief Placeholder for the lowercase conversion table. + * + * This array is initialized to zero and serves as a template for the generated + * @c ParseRulesCTypeToLower table. It is **not used at runtime**. + * + * @note The actual populated table is generated by @c main() and written to + * the @c ParseRulesCTypeToLower file. + */ +const uint8_t parseRulesCTypeToLower[256] = {0}; -const unsigned int parseRulesCType[256] = {0}; -const char parseRulesCTypeToUpper[256] = {0}; -const char parseRulesCTypeToLower[256] = {0}; +/** + * @brief Working array for character type bitmasks. + * + * Temporary storage for computed character classification bitmasks. + * Populated by @c main() and written to the @c ParseRulesCType output file. + * + * @note Each entry is a 32-bit bitmask where each bit represents a character + * classification (e.g., @c is_alpha_BIT, @c is_digit_BIT). + */ +uint32_t tparseRulesCType[256]; -unsigned int tparseRulesCType[256]; -char tparseRulesCTypeToUpper[256]; -char tparseRulesCTypeToLower[256]; +/** + * @brief Working array for uppercase character conversion. + * + * Temporary storage for uppercase character mappings (0-255). + * Populated by @c main() and written to the @c ParseRulesCTypeToUpper output file. + */ +uint8_t tparseRulesCTypeToUpper[256]; -#include -#include -#include "tscore/ink_string.h" +/** + * @brief Working array for lowercase character conversion. + * + * Temporary storage for lowercase character mappings (0-255). + * Populated by @c main() and written to the @c ParseRulesCTypeToLower output file. + */ +uint8_t tparseRulesCTypeToLower[256]; -static char * -uint_to_binary(unsigned int u) +/** + * @brief Convert a 32-bit unsigned integer to its binary string representation. + * + * Creates a 32-character string representing the binary value of the input, + * with leading zeros. The string is in **big-endian format** (MSB first). + * + * @param u The 32-bit unsigned integer to convert. + * @return @c std::string containing the 32-character binary representation. + * Example: @c uint_to_binary(5) returns @c "00000000000000000000000000000101". + * + * @note This function is **thread-safe** because it returns a new @c std::string + * by value (no shared static buffer). + */ +std::string +uint_to_binary(uint32_t u) { - int i; - static char buf[33]; - for (i = 0; i < 32; i++) { - buf[i] = ((u & (1 << (31 - i))) ? '1' : '0'); + std::string buf(32, '0'); + for (uint8_t i = 0; i < 32; i++) { + if (u & (1 << (31 - i))) { + buf[i] = '1'; + } } - buf[32] = '\0'; - return (buf); + return buf; } +/** + * @brief Generates character classification lookup tables. + * + * This function performs the following steps: + * + * 1. For each ASCII character (0-255): + * - Initializes the working arrays (@c tparseRulesCType, @c tparseRulesCTypeToUpper, + * @c tparseRulesCTypeToLower). + * - Tests the character against all @c ParseRules classification functions. + * - Sets the corresponding bit in @c tparseRulesCType[i] for each matching classification. + * - Stores the uppercase/lowercase conversion values in @c tparseRulesCTypeToUpper + * and @c tparseRulesCTypeToLower. + * + * 2. Writes three output files using @c std::ofstream (RAII): + * - @c ParseRulesCType: Contains hexadecimal bitmask values and their binary + * representations. Format: C-style array initialization. + * - @c ParseRulesCTypeToUpper: Contains uppercase conversion values for each + * character. Format: @c (uint8_t)X, + * - @c ParseRulesCTypeToLower: Contains lowercase conversion values for each + * character. Format: @c (uint8_t)X, + * + * @return 0 on successful completion. + * + * @note The classification functions from @c ParseRules.h include: + * - **Character types**: @c is_char, @c is_alpha, @c is_digit, @c is_alnum, + * @c is_ctl, @c is_ws, @c is_hex, @c is_pchar, @c is_token, @c is_uri, + * @c is_sep, @c is_empty. + * - **Case types**: @c is_upalpha, @c is_loalpha. + * - **Safety/encoding**: @c is_safe, @c is_unsafe, @c is_reserved, + * @c is_unreserved, @c is_national. + * - **Special categories**: @c is_punct, @c is_tspecials, @c is_end_of_url. + * - **Whitespace variants**: @c is_spcr, @c is_splf, @c is_wslfcr, @c is_eow. + * - **HTTP/MIME**: @c is_http_field_name, @c is_http_field_value, + * @c is_mime_sep, @c is_control. + * + * @see ParseRules.h for detailed descriptions of each classification function. + * @see ParseRules.cc for runtime usage of the generated tables. + */ int main() { - int c; - for (c = 0; c < 256; c++) { - tparseRulesCType[c] = 0; - tparseRulesCTypeToLower[c] = ParseRules::ink_tolower(c); - tparseRulesCTypeToUpper[c] = ParseRules::ink_toupper(c); + for (uint16_t i = 0; i < 256; i++) { + tparseRulesCType[i] = 0; + tparseRulesCTypeToLower[i] = static_cast(ParseRules::ink_tolower(i)); + tparseRulesCTypeToUpper[i] = static_cast(ParseRules::ink_toupper(i)); - if (ParseRules::is_char(c)) { - tparseRulesCType[c] |= is_char_BIT; + if (ParseRules::is_char(i)) { + tparseRulesCType[i] |= is_char_BIT; } - if (ParseRules::is_upalpha(c)) { - tparseRulesCType[c] |= is_upalpha_BIT; + if (ParseRules::is_upalpha(i)) { + tparseRulesCType[i] |= is_upalpha_BIT; } - if (ParseRules::is_loalpha(c)) { - tparseRulesCType[c] |= is_loalpha_BIT; + if (ParseRules::is_loalpha(i)) { + tparseRulesCType[i] |= is_loalpha_BIT; } - if (ParseRules::is_alpha(c)) { - tparseRulesCType[c] |= is_alpha_BIT; + if (ParseRules::is_alpha(i)) { + tparseRulesCType[i] |= is_alpha_BIT; } - if (ParseRules::is_digit(c)) { - tparseRulesCType[c] |= is_digit_BIT; + if (ParseRules::is_digit(i)) { + tparseRulesCType[i] |= is_digit_BIT; } - if (ParseRules::is_ctl(c)) { - tparseRulesCType[c] |= is_ctl_BIT; + if (ParseRules::is_ctl(i)) { + tparseRulesCType[i] |= is_ctl_BIT; } - if (ParseRules::is_ws(c)) { - tparseRulesCType[c] |= is_ws_BIT; + if (ParseRules::is_ws(i)) { + tparseRulesCType[i] |= is_ws_BIT; } - if (ParseRules::is_hex(c)) { - tparseRulesCType[c] |= is_hex_BIT; + if (ParseRules::is_hex(i)) { + tparseRulesCType[i] |= is_hex_BIT; } - char cc = c; + + char cc = static_cast(i); + if (ParseRules::is_pchar(&cc)) { - tparseRulesCType[c] |= is_pchar_BIT; + tparseRulesCType[i] |= is_pchar_BIT; } - if (ParseRules::is_extra(c)) { - tparseRulesCType[c] |= is_extra_BIT; + if (ParseRules::is_extra(i)) { + tparseRulesCType[i] |= is_extra_BIT; } - if (ParseRules::is_safe(c)) { - tparseRulesCType[c] |= is_safe_BIT; + if (ParseRules::is_safe(i)) { + tparseRulesCType[i] |= is_safe_BIT; } - if (ParseRules::is_unsafe(c)) { - tparseRulesCType[c] |= is_unsafe_BIT; + if (ParseRules::is_unsafe(i)) { + tparseRulesCType[i] |= is_unsafe_BIT; } - if (ParseRules::is_national(c)) { - tparseRulesCType[c] |= is_national_BIT; + if (ParseRules::is_national(i)) { + tparseRulesCType[i] |= is_national_BIT; } - if (ParseRules::is_reserved(c)) { - tparseRulesCType[c] |= is_reserved_BIT; + if (ParseRules::is_reserved(i)) { + tparseRulesCType[i] |= is_reserved_BIT; } - if (ParseRules::is_unreserved(c)) { - tparseRulesCType[c] |= is_unreserved_BIT; + if (ParseRules::is_unreserved(i)) { + tparseRulesCType[i] |= is_unreserved_BIT; } - if (ParseRules::is_punct(c)) { - tparseRulesCType[c] |= is_punct_BIT; + if (ParseRules::is_punct(i)) { + tparseRulesCType[i] |= is_punct_BIT; } - if (ParseRules::is_end_of_url(c)) { - tparseRulesCType[c] |= is_end_of_url_BIT; + if (ParseRules::is_end_of_url(i)) { + tparseRulesCType[i] |= is_end_of_url_BIT; } - if (ParseRules::is_tspecials(c)) { - tparseRulesCType[c] |= is_tspecials_BIT; + if (ParseRules::is_tspecials(i)) { + tparseRulesCType[i] |= is_tspecials_BIT; } - if (ParseRules::is_spcr(c)) { - tparseRulesCType[c] |= is_spcr_BIT; + if (ParseRules::is_spcr(i)) { + tparseRulesCType[i] |= is_spcr_BIT; } - if (ParseRules::is_splf(c)) { - tparseRulesCType[c] |= is_splf_BIT; + if (ParseRules::is_splf(i)) { + tparseRulesCType[i] |= is_splf_BIT; } - if (ParseRules::is_wslfcr(c)) { - tparseRulesCType[c] |= is_wslfcr_BIT; + if (ParseRules::is_wslfcr(i)) { + tparseRulesCType[i] |= is_wslfcr_BIT; } - if (ParseRules::is_eow(c)) { - tparseRulesCType[c] |= is_eow_BIT; + if (ParseRules::is_eow(i)) { + tparseRulesCType[i] |= is_eow_BIT; } - if (ParseRules::is_token(c)) { - tparseRulesCType[c] |= is_token_BIT; + if (ParseRules::is_token(i)) { + tparseRulesCType[i] |= is_token_BIT; } - if (ParseRules::is_uri(c)) { - tparseRulesCType[c] |= is_uri_BIT; + if (ParseRules::is_uri(i)) { + tparseRulesCType[i] |= is_uri_BIT; } - if (ParseRules::is_sep(c)) { - tparseRulesCType[c] |= is_sep_BIT; + if (ParseRules::is_sep(i)) { + tparseRulesCType[i] |= is_sep_BIT; } - if (ParseRules::is_empty(c)) { - tparseRulesCType[c] |= is_empty_BIT; + if (ParseRules::is_empty(i)) { + tparseRulesCType[i] |= is_empty_BIT; } - if (ParseRules::is_alnum(c)) { - tparseRulesCType[c] |= is_alnum_BIT; + if (ParseRules::is_alnum(i)) { + tparseRulesCType[i] |= is_alnum_BIT; } - if (ParseRules::is_space(c)) { - tparseRulesCType[c] |= is_space_BIT; + if (ParseRules::is_space(i)) { + tparseRulesCType[i] |= is_space_BIT; } - if (ParseRules::is_control(c)) { - tparseRulesCType[c] |= is_control_BIT; + if (ParseRules::is_control(i)) { + tparseRulesCType[i] |= is_control_BIT; } - if (ParseRules::is_mime_sep(c)) { - tparseRulesCType[c] |= is_mime_sep_BIT; + if (ParseRules::is_mime_sep(i)) { + tparseRulesCType[i] |= is_mime_sep_BIT; } - if (ParseRules::is_http_field_name(c)) { - tparseRulesCType[c] |= is_http_field_name_BIT; + if (ParseRules::is_http_field_name(i)) { + tparseRulesCType[i] |= is_http_field_name_BIT; } - if (ParseRules::is_http_field_value(c)) { - tparseRulesCType[c] |= is_http_field_value_BIT; + if (ParseRules::is_http_field_value(i)) { + tparseRulesCType[i] |= is_http_field_value_BIT; } } - FILE *fp = fopen("ParseRulesCType", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "/* %3d (%c) */\t", c, (isprint(c) ? c : '?')); - fprintf(fp, "0x%08X%c\t\t", tparseRulesCType[c], (c != 255 ? ',' : ' ')); - fprintf(fp, "/* [%s] */\n", uint_to_binary((tparseRulesCType[c]))); + // Write ParseRulesCType (bitmask table with binary representation) + std::ofstream fp("ParseRulesCType"); + for (uint16_t i = 0; i < 256; ++i) { + fp << "/* " << std::setw(3) << i << " (" << (isprint(i) ? static_cast(i) : '?') << ") */\t"; + fp << "0x" << std::hex << std::setw(8) << std::setfill('0') << tparseRulesCType[i] << (i != 255 ? ",\t\t" : "\t\t"); + fp << "/* [" << uint_to_binary(tparseRulesCType[i]) << "] */\n"; } - fclose(fp); - fp = fopen("ParseRulesCTypeToUpper", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "%d%c\n", tparseRulesCTypeToUpper[c], c != 255 ? ',' : ' '); + + // Write ParseRulesCTypeToUpper (uppercase conversion table) + { + std::ofstream fp("ParseRulesCTypeToUpper"); + for (uint16_t i = 0; i < 256; ++i) { + fp << "(uint8_t)" << static_cast(tparseRulesCTypeToUpper[i]) << (i != 255 ? ',' : ' ') << '\n'; + } } - fclose(fp); - fp = fopen("ParseRulesCTypeToLower", "w"); - for (c = 0; c < 256; c++) { - fprintf(fp, "%d%c\n", tparseRulesCTypeToLower[c], c != 255 ? ',' : ' '); + + // Write ParseRulesCTypeToLower (lowercase conversion table) + { + std::ofstream fp("ParseRulesCTypeToLower"); + for (uint16_t i = 0; i < 256; ++i) { + fp << "(uint8_t)" << static_cast(tparseRulesCTypeToLower[i]) << (i != 255 ? ',' : ' ') << '\n'; + } } - fclose(fp); return (0); }