diff --git a/Makefile.pre.in b/Makefile.pre.in index b9914369ad1bedc..36c58d4a7261bc1 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -621,13 +621,16 @@ LIBEXPAT_HEADERS= \ Modules/expat/expat.h \ Modules/expat/expat_config.h \ Modules/expat/expat_external.h \ + Modules/expat/fallthrough.h \ Modules/expat/iasciitab.h \ Modules/expat/internal.h \ Modules/expat/latin1tab.h \ + Modules/expat/memory_sanitizer.h \ Modules/expat/nametab.h \ Modules/expat/pyexpatns.h \ Modules/expat/siphash.h \ Modules/expat/utf8tab.h \ + Modules/expat/xcsinc.c \ Modules/expat/xmlrole.h \ Modules/expat/xmltok.h \ Modules/expat/xmltok_impl.h \ diff --git a/Misc/NEWS.d/next/Security/2026-06-25-20-44-20.gh-issue-152216.VxiK5y.rst b/Misc/NEWS.d/next/Security/2026-06-25-20-44-20.gh-issue-152216.VxiK5y.rst new file mode 100644 index 000000000000000..2769048e001d694 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-06-25-20-44-20.gh-issue-152216.VxiK5y.rst @@ -0,0 +1 @@ +Update bundled `libexpat `_ to version 2.8.2. diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 5791592dc77a84f..fbc7ca631d70e29 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -48,11 +48,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "58101ef0951568acadd3117033bef084fea24cc1" + "checksumValue": "00c5e72b384f5c305be613729184ad37bb491238" }, { "algorithm": "SHA256", - "checksumValue": "52d756026bf09befdb211c453e2009a646d6c6b519e6885e971b2550396619fb" + "checksumValue": "eb43180fbdca40e36d9558060e6e654ef4c451ca656ad679e9e1269eb45456b3" } ], "fileName": "Modules/expat/expat.h" @@ -71,6 +71,20 @@ ], "fileName": "Modules/expat/expat_external.h" }, + { + "SPDXID": "SPDXRef-FILE-Modules-expat-fallthrough.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "69db5031480c50a1e35a51190bd64fcb816b6caf" + }, + { + "algorithm": "SHA256", + "checksumValue": "3dac2e4fdec819ede1b081ef776f2421c98ab509f69d5647a21d63d651179df2" + } + ], + "fileName": "Modules/expat/fallthrough.h" + }, { "SPDXID": "SPDXRef-FILE-Modules-expat-iasciitab.h", "checksums": [ @@ -113,6 +127,20 @@ ], "fileName": "Modules/expat/latin1tab.h" }, + { + "SPDXID": "SPDXRef-FILE-Modules-expat-memory-sanitizer.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "abdf1b9e9642176466ecbb77d5f48e502d01d0d8" + }, + { + "algorithm": "SHA256", + "checksumValue": "10c66f1cf9dc28608d57cb19adc8c7c654800e8b5b1b9be176eaed802deca2d9" + } + ], + "fileName": "Modules/expat/memory_sanitizer.h" + }, { "SPDXID": "SPDXRef-FILE-Modules-expat-nametab.h", "checksums": [ @@ -132,11 +160,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "aca27f46d9fd387b63ce7ff2e4f172cad130b39b" + "checksumValue": "6648f9c12a8d6f77a6eab5ef5c7caa976e7549aa" }, { "algorithm": "SHA256", - "checksumValue": "f537add526ecda8389503b7ef45fb52b6217e4dc171dcc3a8dc6903ff6134726" + "checksumValue": "c1be28dd62282e668d4daedfe756edb68b7da165c442202bb06a9e597cd5c289" } ], "fileName": "Modules/expat/siphash.h" @@ -169,16 +197,30 @@ ], "fileName": "Modules/expat/winconfig.h" }, + { + "SPDXID": "SPDXRef-FILE-Modules-expat-xcsinc.c", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "69fa7a25f555164f089fdcf290f3af6a1731621f" + }, + { + "algorithm": "SHA256", + "checksumValue": "071345c5ccfbe2e46a02839331ee60ee20048348b4408964bff48c4355473b9e" + } + ], + "fileName": "Modules/expat/xcsinc.c" + }, { "SPDXID": "SPDXRef-FILE-Modules-expat-xmlparse.c", "checksums": [ { "algorithm": "SHA1", - "checksumValue": "1dad2ab196cdbe37572674c465bd9187fdbe4495" + "checksumValue": "a5a5dd44ee8eeb73f42fa42ca1499cd282ddf6a6" }, { "algorithm": "SHA256", - "checksumValue": "740137e670d2f3b7269364ffb6f60064e6560091850c5d6f2c3bb1b8ca6e3dd1" + "checksumValue": "5d2c99b576744edd9545cefe613e0577617f57e16c86387edc13d45710dc97d2" } ], "fileName": "Modules/expat/xmlparse.c" @@ -216,11 +258,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "63e4766a09e63760c6518670509198f8d638f4ad" + "checksumValue": "08dba893b34338e64b66f1044f542aa6760dbab1" }, { "algorithm": "SHA256", - "checksumValue": "0ad3f915f2748dc91bf4e4b4a50cf40bf2c95769d0eca7e3b293a230d82bb779" + "checksumValue": "0c842b1876503e699517994317805dd2298d9e11c94dcc87c2ecb6c2b8e00bc0" } ], "fileName": "Modules/expat/xmltok.c" @@ -244,11 +286,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "7756f7c0d3625ae7dde6cf7d386685ffacb57c7e" + "checksumValue": "3ccb9335589c3ff60aae50ac79b2cdac57999bee" }, { "algorithm": "SHA256", - "checksumValue": "a3fe18ff32b21fbcb7c190895c68158404e1b9fb449db6431bc08b261dc03938" + "checksumValue": "94eeaef9ed46d10f80f748924fbf41f950b4859d6f234f61fdb2c8f1a8e77bd1" } ], "fileName": "Modules/expat/xmltok_impl.c" @@ -1002,14 +1044,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "a52eb72108be160e190b5cafa5bba8663f1313f2013e26060d1c18e26e31067b" + "checksumValue": "ef7d1994f533c9e7343d6c19f31064fc8ebbcbcaa144be3812b4f43052a05f4c" } ], - "downloadLocation": "https://github.com/libexpat/libexpat/releases/download/R_2_8_1/expat-2.8.1.tar.gz", + "downloadLocation": "https://github.com/libexpat/libexpat/releases/download/R_2_8_2/expat-2.8.2.tar.gz", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:libexpat_project:libexpat:2.8.1:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:libexpat_project:libexpat:2.8.2:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1017,7 +1059,7 @@ "name": "expat", "originator": "Organization: Expat development team", "primaryPackagePurpose": "SOURCE", - "versionInfo": "2.8.1" + "versionInfo": "2.8.2" }, { "SPDXID": "SPDXRef-PACKAGE-hacl-star", @@ -1090,6 +1132,11 @@ "relationshipType": "CONTAINS", "spdxElementId": "SPDXRef-PACKAGE-expat" }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-fallthrough.h", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-expat" + }, { "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-iasciitab.h", "relationshipType": "CONTAINS", @@ -1105,6 +1152,11 @@ "relationshipType": "CONTAINS", "spdxElementId": "SPDXRef-PACKAGE-expat" }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-memory-sanitizer.h", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-expat" + }, { "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-nametab.h", "relationshipType": "CONTAINS", @@ -1125,6 +1177,11 @@ "relationshipType": "CONTAINS", "spdxElementId": "SPDXRef-PACKAGE-expat" }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-xcsinc.c", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-expat" + }, { "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-xmlparse.c", "relationshipType": "CONTAINS", diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index ec3f58544cb00d5..c493c70441c4184 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -1094,7 +1094,7 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); */ # define XML_MAJOR_VERSION 2 # define XML_MINOR_VERSION 8 -# define XML_MICRO_VERSION 1 +# define XML_MICRO_VERSION 2 # ifdef __cplusplus } diff --git a/Modules/expat/fallthrough.h b/Modules/expat/fallthrough.h new file mode 100644 index 000000000000000..707dbdd44bfe53d --- /dev/null +++ b/Modules/expat/fallthrough.h @@ -0,0 +1,49 @@ +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2026 Nick Begg + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef FALLTHROUGH_H +# define FALLTHROUGH_H 1 + +// Explicit fallthrough in switch case to avoid warnings +// with compiler flag -Wimplicit-fallthrough. + +# define EXPAT_FALLTHROUGH \ + do { \ + } while (0) + +# if defined(__has_attribute) +# if __has_attribute(fallthrough) +# undef EXPAT_FALLTHROUGH +# define EXPAT_FALLTHROUGH __attribute__((fallthrough)) +# endif +# endif + +#endif // FALLTHROUGH_H diff --git a/Modules/expat/memory_sanitizer.h b/Modules/expat/memory_sanitizer.h new file mode 100644 index 000000000000000..a8a8006ccded12d --- /dev/null +++ b/Modules/expat/memory_sanitizer.h @@ -0,0 +1,51 @@ +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2026 Matthew Fernandez + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#if ! defined(MEMORY_SANITIZER_H) +# define MEMORY_SANITIZER_H 1 + +# if defined(__has_feature) +# if __has_feature(memory_sanitizer) +# include + +// inform Memory Sanitizer that [base, base + extent) is now initialized +# define MSAN_UNPOISON(base, extent) __msan_unpoison((base), (extent)) + +# endif +# endif + +# if ! defined(MSAN_UNPOISON) +# define MSAN_UNPOISON(base, extent) \ + do { \ + } while (0) +# endif + +#endif // ! defined(MEMORY_SANITIZER_H) diff --git a/Modules/expat/refresh.sh b/Modules/expat/refresh.sh index fa3692f9379510e..503f562c1862a48 100755 --- a/Modules/expat/refresh.sh +++ b/Modules/expat/refresh.sh @@ -12,9 +12,9 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. These values are used for verifying the SBOM, too. -expected_libexpat_tag="R_2_8_1" -expected_libexpat_version="2.8.1" -expected_libexpat_sha256="a52eb72108be160e190b5cafa5bba8663f1313f2013e26060d1c18e26e31067b" +expected_libexpat_tag="R_2_8_2" +expected_libexpat_version="2.8.2" +expected_libexpat_sha256="ef7d1994f533c9e7343d6c19f31064fc8ebbcbcaa144be3812b4f43052a05f4c" expat_dir="$(realpath "$(dirname -- "${BASH_SOURCE[0]}")")" cd ${expat_dir} @@ -33,13 +33,16 @@ lib_files=( asciitab.h expat.h expat_external.h + fallthrough.h iasciitab.h internal.h latin1tab.h + memory_sanitizer.h nametab.h siphash.h utf8tab.h winconfig.h + xcsinc.c xmlparse.c xmlrole.c xmlrole.h diff --git a/Modules/expat/siphash.h b/Modules/expat/siphash.h index 04f6f74585b5a2f..be216b4006dc847 100644 --- a/Modules/expat/siphash.h +++ b/Modules/expat/siphash.h @@ -100,6 +100,7 @@ #include /* size_t */ #include /* uint64_t uint32_t uint8_t */ +#include "fallthrough.h" /* * Workaround to not require a C++11 compiler for using ULL suffix @@ -234,25 +235,25 @@ sip24_final(struct siphash *H) { switch (left) { case 7: b |= (uint64_t)H->buf[6] << 48; - /* fall through */ + EXPAT_FALLTHROUGH; case 6: b |= (uint64_t)H->buf[5] << 40; - /* fall through */ + EXPAT_FALLTHROUGH; case 5: b |= (uint64_t)H->buf[4] << 32; - /* fall through */ + EXPAT_FALLTHROUGH; case 4: b |= (uint64_t)H->buf[3] << 24; - /* fall through */ + EXPAT_FALLTHROUGH; case 3: b |= (uint64_t)H->buf[2] << 16; - /* fall through */ + EXPAT_FALLTHROUGH; case 2: b |= (uint64_t)H->buf[1] << 8; - /* fall through */ + EXPAT_FALLTHROUGH; case 1: b |= (uint64_t)H->buf[0] << 0; - /* fall through */ + EXPAT_FALLTHROUGH; case 0: break; } diff --git a/Modules/expat/xcsinc.c b/Modules/expat/xcsinc.c new file mode 100644 index 000000000000000..3597c2480bc9617 --- /dev/null +++ b/Modules/expat/xcsinc.c @@ -0,0 +1,48 @@ +/* This file is included from other .c files! + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2022 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +static size_t +xcslen(const XML_Char *s) { +#ifdef XML_UNICODE +# ifdef XML_UNICODE_WCHAR_T + return wcslen(s); +# else + // XML_Char is unsigned short + size_t len = 0; + while (s[len]) { + len++; + } + return len; +# endif +#else + return strlen(s); +#endif +} diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index 95d346758563ab7..d961ebf227e7405 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -1,4 +1,4 @@ -/* 75ef4224f81c052e9e5aeea2ac7de75357d2169ff9908e39edc08b9dc3052513 (2.8.1+) +/* 5de44e6750c6cc78818f06ed552f522a1241df0299395250e1792cb339389daf (2.8.2+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -47,6 +47,9 @@ Copyright (c) 2026 Rosen Penev Copyright (c) 2026 Francesco Bertolaccini Copyright (c) 2026 Christian Ng + Copyright (c) 2026 Nick Begg + Copyright (c) 2026 Kartik Kenchi + Copyright (c) 2026 Haris Hussain Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -90,7 +93,7 @@ #include #include /* memset(), memcpy() */ #include -#include /* INT_MAX, UINT_MAX */ +#include /* INT_MAX, LLONG_MAX, LONG_MAX, UINT_MAX */ #include /* fprintf */ #include /* getenv */ #include /* SIZE_MAX, uintptr_t */ @@ -114,6 +117,7 @@ #include "ascii.h" #include "expat.h" #include "siphash.h" +#include "xcsinc.c" #if defined(HAVE_ARC4RANDOM) # include "random_arc4random.h" @@ -156,7 +160,7 @@ * Windows >=Vista (rand_s): _WIN32. \ \ If you insist on not using any of these, bypass this error by defining \ - XML_POOR_ENTROPY; you have been warned. \ + XML_POOR_ENTROPY and be vulnerable to hash flooding; you have been warned. \ \ If you have reasons to patch this detection code away or need changes \ to the build system, please open a bug. Thank you! @@ -207,6 +211,12 @@ typedef char ICHAR; #endif +#ifdef XML_LARGE_SIZE +# define XML_INDEX_MAX LLONG_MAX +#else +# define XML_INDEX_MAX LONG_MAX +#endif + /* Round up n to be a multiple of sz, where sz is a power of 2. */ #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) @@ -270,8 +280,8 @@ typedef struct binding { struct binding *prevPrefixBinding; const struct attribute_id *attId; XML_Char *uri; - int uriLen; - int uriAlloc; + size_t uriLen; + size_t uriAlloc; } BINDING; typedef struct prefix { @@ -283,9 +293,9 @@ typedef struct { const XML_Char *str; const XML_Char *localPart; const XML_Char *prefix; - int strLen; - int uriLen; - int prefixLen; + size_t strLen; + size_t uriLen; + size_t prefixLen; } TAG_NAME; /* TAG represents an open element. @@ -384,8 +394,8 @@ typedef struct { const XML_Char *name; PREFIX *prefix; const ATTRIBUTE_ID *idAtt; - int nDefaultAtts; - int allocDefaultAtts; + size_t nDefaultAtts; + size_t allocDefaultAtts; DEFAULT_ATTRIBUTE *defaultAtts; HASH_TABLE defaultAttsNames; } ELEMENT_TYPE; @@ -417,6 +427,7 @@ typedef struct { unsigned scaffCount; int scaffLevel; int *scaffIndex; + size_t scaffIndexSize; } DTD; enum EntityType { @@ -588,6 +599,7 @@ static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end); static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); +static bool FASTCALL poolGrowUntil(STRING_POOL *pool, size_t needed); static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, const XML_Char *s); static const XML_Char *FASTCALL poolCopyStringNoFinish(STRING_POOL *pool, @@ -642,16 +654,40 @@ static XML_Parser getRootParserOf(XML_Parser parser, static unsigned long getDebugLevel(const char *variableName, unsigned long defaultDebugLevel); +static bool poolAppendChar(STRING_POOL *pool, XML_Char c); + +static bool poolAppendChars(STRING_POOL *pool, const XML_Char *s, size_t len); + #define poolStart(pool) ((pool)->start) #define poolLength(pool) ((pool)->ptr - (pool)->start) #define poolChop(pool) ((void)--(pool->ptr)) #define poolLastChar(pool) (((pool)->ptr)[-1]) #define poolDiscard(pool) ((pool)->ptr = (pool)->start) #define poolFinish(pool) ((pool)->start = (pool)->ptr) -#define poolAppendChar(pool, c) \ - (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \ - ? 0 \ - : ((*((pool)->ptr)++ = c), 1)) + +bool +poolAppendChar(STRING_POOL *pool, XML_Char c) { + if (pool->ptr == pool->end && ! poolGrow(pool)) + return false; + + *(pool->ptr)++ = c; + return true; +} + +bool +poolAppendChars(STRING_POOL *pool, const XML_Char *s, size_t len) { + // Detect and prevent integer overflow + if (len > SIZE_MAX / sizeof(XML_Char)) + return false; + + if (! poolGrowUntil(pool, len)) + return false; + + memcpy(pool->ptr, s, len * sizeof(XML_Char)); + pool->ptr += len; + + return true; +} #if ! defined(XML_TESTING) const @@ -730,11 +766,9 @@ struct XML_ParserStruct { const char *m_eventEndPtr; const char *m_positionPtr; OPEN_INTERNAL_ENTITY *m_openInternalEntities; - OPEN_INTERNAL_ENTITY *m_freeInternalEntities; OPEN_INTERNAL_ENTITY *m_openAttributeEntities; - OPEN_INTERNAL_ENTITY *m_freeAttributeEntities; OPEN_INTERNAL_ENTITY *m_openValueEntities; - OPEN_INTERNAL_ENTITY *m_freeValueEntities; + OPEN_INTERNAL_ENTITY *m_freeEntities; XML_Bool m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; @@ -754,7 +788,7 @@ struct XML_ParserStruct { TAG *m_freeTagList; BINDING *m_inheritedBindings; BINDING *m_freeBindingList; - int m_attsSize; + size_t m_attsSize; int m_nSpecifiedAtts; int m_idAttIndex; ATTRIBUTE *m_atts; @@ -768,7 +802,7 @@ struct XML_ParserStruct { STRING_POOL m_tempPool; STRING_POOL m_temp2Pool; char *m_groupConnector; - unsigned int m_groupSize; + size_t m_groupSize; XML_Char m_namespaceSeparator; XML_Parser m_parentParser; XML_ParsingStatus m_parsingStatus; @@ -785,6 +819,7 @@ struct XML_ParserStruct { ENTITY_STATS m_entity_stats; #endif XML_Bool m_reenter; + unsigned m_handlerCallDepth; }; #if XML_GE == 1 @@ -1128,6 +1163,23 @@ generate_hash_secret_salt(void) { #endif } +static void +beforeHandler(XML_Parser parser) { + assert(parser->m_handlerCallDepth < UINT_MAX); + parser->m_handlerCallDepth++; +} + +static void +afterHandler(XML_Parser parser) { + assert(parser->m_handlerCallDepth > 0); + parser->m_handlerCallDepth--; +} + +static bool +isCalledFromInsideHandler(XML_Parser parser) { + return parser->m_handlerCallDepth > 0; +} + static enum XML_Error callProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { @@ -1364,9 +1416,7 @@ parserCreate(const XML_Char *encodingName, parser->m_freeBindingList = NULL; parser->m_freeTagList = NULL; - parser->m_freeInternalEntities = NULL; - parser->m_freeAttributeEntities = NULL; - parser->m_freeValueEntities = NULL; + parser->m_freeEntities = NULL; parser->m_groupSize = 0; parser->m_groupConnector = NULL; @@ -1482,6 +1532,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_parsingStatus.parsing = XML_INITIALIZED; // Reentry can only be triggered inside m_processor calls parser->m_reenter = XML_FALSE; + parser->m_handlerCallDepth = 0; #ifdef XML_DTD parser->m_isParamEntity = XML_FALSE; parser->m_useForeignDTD = XML_FALSE; @@ -1515,12 +1566,22 @@ moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { } } +/* Moves a list of entities onto the start of another list. */ +static void +moveEntityList(OPEN_INTERNAL_ENTITY **dst, OPEN_INTERNAL_ENTITY **src) { + for (OPEN_INTERNAL_ENTITY *head = *src; head != NULL;) { + OPEN_INTERNAL_ENTITY *const openEntity = head; + head = head->next; + openEntity->next = *dst; + *dst = openEntity; + } +} + XML_Bool XMLCALL XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { TAG *tStk; - OPEN_INTERNAL_ENTITY *openEntityList; - if (parser == NULL) + if ((parser == NULL) || isCalledFromInsideHandler(parser)) return XML_FALSE; if (parser->m_parentParser) @@ -1535,32 +1596,14 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { tag->bindings = NULL; parser->m_freeTagList = tag; } - /* move m_openInternalEntities to m_freeInternalEntities */ - openEntityList = parser->m_openInternalEntities; - while (openEntityList) { - OPEN_INTERNAL_ENTITY *openEntity = openEntityList; - openEntityList = openEntity->next; - openEntity->next = parser->m_freeInternalEntities; - parser->m_freeInternalEntities = openEntity; - } - /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but - * for attributes) */ - openEntityList = parser->m_openAttributeEntities; - while (openEntityList) { - OPEN_INTERNAL_ENTITY *openEntity = openEntityList; - openEntityList = openEntity->next; - openEntity->next = parser->m_freeAttributeEntities; - parser->m_freeAttributeEntities = openEntity; - } - /* move m_openValueEntities to m_freeValueEntities (i.e. same task but - * for value entities) */ - openEntityList = parser->m_openValueEntities; - while (openEntityList) { - OPEN_INTERNAL_ENTITY *openEntity = openEntityList; - openEntityList = openEntity->next; - openEntity->next = parser->m_freeValueEntities; - parser->m_freeValueEntities = openEntity; - } + /* move m_openInternalEntities to m_freeEntities */ + moveEntityList(&parser->m_freeEntities, &parser->m_openInternalEntities); + /* move m_openAttributeEntities to m_freeEntities (i.e. same task but for + * attributes) */ + moveEntityList(&parser->m_freeEntities, &parser->m_openAttributeEntities); + /* move m_openValueEntities to m_freeEntities (i.e. same task but for value + * entities) */ + moveEntityList(&parser->m_freeEntities, &parser->m_openValueEntities); moveToFreeBindingList(parser, parser->m_inheritedBindings); FREE(parser, parser->m_unknownEncodingMem); if (parser->m_unknownEncodingRelease) @@ -1712,11 +1755,6 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, newDtd = oldDtd; #endif /* XML_DTD */ - /* Note that the magical uses of the pre-processor to make field - access look more like C++ require that `parser' be overwritten - here. This makes this function more painful to follow than it - would be otherwise. - */ if (parser->m_ns) { XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser); @@ -1806,8 +1844,7 @@ destroyBindings(BINDING *bindings, XML_Parser parser) { void XMLCALL XML_ParserFree(XML_Parser parser) { TAG *tagList; - OPEN_INTERNAL_ENTITY *entityList; - if (parser == NULL) + if ((parser == NULL) || isCalledFromInsideHandler(parser)) return; /* free m_tagStack and m_freeTagList */ tagList = parser->m_tagStack; @@ -1825,48 +1862,35 @@ XML_ParserFree(XML_Parser parser) { destroyBindings(p->bindings, parser); FREE(parser, p); } - /* free m_openInternalEntities and m_freeInternalEntities */ - entityList = parser->m_openInternalEntities; - for (;;) { - OPEN_INTERNAL_ENTITY *openEntity; - if (entityList == NULL) { - if (parser->m_freeInternalEntities == NULL) - break; - entityList = parser->m_freeInternalEntities; - parser->m_freeInternalEntities = NULL; - } - openEntity = entityList; + /* free m_openInternalEntities */ + for (OPEN_INTERNAL_ENTITY *entityList = parser->m_openInternalEntities; + entityList != NULL;) { + OPEN_INTERNAL_ENTITY *const openEntity = entityList; entityList = entityList->next; FREE(parser, openEntity); } - /* free m_openAttributeEntities and m_freeAttributeEntities */ - entityList = parser->m_openAttributeEntities; - for (;;) { - OPEN_INTERNAL_ENTITY *openEntity; - if (entityList == NULL) { - if (parser->m_freeAttributeEntities == NULL) - break; - entityList = parser->m_freeAttributeEntities; - parser->m_freeAttributeEntities = NULL; - } - openEntity = entityList; + /* free m_openAttributeEntities */ + for (OPEN_INTERNAL_ENTITY *entityList = parser->m_openAttributeEntities; + entityList != NULL;) { + OPEN_INTERNAL_ENTITY *const openEntity = entityList; entityList = entityList->next; FREE(parser, openEntity); } - /* free m_openValueEntities and m_freeValueEntities */ - entityList = parser->m_openValueEntities; - for (;;) { - OPEN_INTERNAL_ENTITY *openEntity; - if (entityList == NULL) { - if (parser->m_freeValueEntities == NULL) - break; - entityList = parser->m_freeValueEntities; - parser->m_freeValueEntities = NULL; - } - openEntity = entityList; + /* free m_openValueEntities */ + for (OPEN_INTERNAL_ENTITY *entityList = parser->m_openValueEntities; + entityList != NULL;) { + OPEN_INTERNAL_ENTITY *const openEntity = entityList; entityList = entityList->next; FREE(parser, openEntity); } + /* free m_freeEntities */ + for (OPEN_INTERNAL_ENTITY *entityList = parser->m_freeEntities; + entityList != NULL;) { + OPEN_INTERNAL_ENTITY *const openEntity = entityList; + entityList = entityList->next; + FREE(parser, openEntity); + } + parser->m_freeEntities = NULL; destroyBindings(parser->m_freeBindingList, parser); destroyBindings(parser->m_inheritedBindings, parser); poolDestroy(&parser->m_tempPool); @@ -2263,6 +2287,8 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; return XML_STATUS_ERROR; } + if (isCalledFromInsideHandler(parser)) + return XML_STATUS_ERROR; switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: parser->m_errorCode = XML_ERROR_SUSPENDED; @@ -2275,7 +2301,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return XML_STATUS_ERROR; } - /* fall through */ + EXPAT_FALLTHROUGH; default: parser->m_parsingStatus.parsing = XML_PARSING; } @@ -2286,7 +2312,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { int nLeftOver; enum XML_Status result; /* Detect overflow (a+b > MAX <==> b > MAX-a) */ - if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { + if (len > XML_INDEX_MAX - parser->m_parseEndByteIndex) { parser->m_errorCode = XML_ERROR_NO_MEMORY; parser->m_eventPtr = parser->m_eventEndPtr = NULL; parser->m_processor = errorProcessor; @@ -2317,7 +2343,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { parser->m_parsingStatus.parsing = XML_FINISHED; return XML_STATUS_OK; } - /* fall through */ + EXPAT_FALLTHROUGH; default: result = XML_STATUS_OK; } @@ -2372,7 +2398,7 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { const char *start; enum XML_Status result = XML_STATUS_OK; - if (parser == NULL) + if ((parser == NULL) || isCalledFromInsideHandler(parser)) return XML_STATUS_ERROR; if (len < 0) { @@ -2398,11 +2424,19 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return XML_STATUS_ERROR; } - /* fall through */ + EXPAT_FALLTHROUGH; default: parser->m_parsingStatus.parsing = XML_PARSING; } + // Detect and avoid integer overflow + if (len > XML_INDEX_MAX - parser->m_parseEndByteIndex) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; + } + start = parser->m_bufferPtr; parser->m_positionPtr = start; parser->m_bufferEnd += len; @@ -2428,6 +2462,7 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { parser->m_parsingStatus.parsing = XML_FINISHED; return result; } + break; default:; /* should not happen */ } } @@ -2438,9 +2473,32 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { return result; } +/* Modifies `parser`’s buffer to be backed by `newBuf`. */ +static void +setParserBuffer(XML_Parser parser, char *newBuf, int newBufSize, int keep) { + parser->m_bufferLim = newBuf + newBufSize; + if (parser->m_bufferPtr) { + const int parsing + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); + memcpy(newBuf, parser->m_bufferPtr - keep, parsing + keep); + // NOTE: We are avoiding FREE(..) here because parser->m_buffer + // is not being allocated with MALLOC(..) but with plain + // .malloc_fcn(..). + parser->m_mem.free_fcn(parser->m_buffer); + parser->m_buffer = newBuf; + parser->m_bufferEnd = newBuf + parsing + keep; + parser->m_bufferPtr = newBuf + keep; + } else { + /* This must be a brand new buffer with no data in it yet */ + parser->m_buffer = newBuf; + parser->m_bufferEnd = newBuf; + parser->m_bufferPtr = newBuf; + } +} + void *XMLCALL XML_GetBuffer(XML_Parser parser, int len) { - if (parser == NULL) + if ((parser == NULL) || isCalledFromInsideHandler(parser)) return NULL; if (len < 0) { parser->m_errorCode = XML_ERROR_NO_MEMORY; @@ -2461,9 +2519,6 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_lastBufferRequestSize = len; if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) || parser->m_buffer == NULL) { -#if XML_CONTEXT_BYTES > 0 - int keep; -#endif /* XML_CONTEXT_BYTES > 0 */ /* Do not invoke signed arithmetic overflow: */ int neededSize = (int)((unsigned)len + (unsigned)EXPAT_SAFE_PTR_DIFF( @@ -2473,7 +2528,9 @@ XML_GetBuffer(XML_Parser parser, int len) { return NULL; } #if XML_CONTEXT_BYTES > 0 - keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); + const int parsed + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); + int keep = parsed; if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; /* Detect and prevent integer overflow */ @@ -2481,16 +2538,16 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } - neededSize += keep; +#else + int keep = 0; #endif /* XML_CONTEXT_BYTES > 0 */ + neededSize += keep; if (parser->m_buffer && parser->m_bufferPtr && neededSize <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { #if XML_CONTEXT_BYTES > 0 - if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { - int offset - = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) - - keep; + if (keep < parsed) { + int offset = parsed - keep; /* The buffer pointers cannot be NULL here; we have at least some bytes * in the buffer */ memmove(parser->m_buffer, &parser->m_buffer[offset], @@ -2507,7 +2564,6 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_bufferPtr = parser->m_buffer; #endif /* XML_CONTEXT_BYTES > 0 */ } else { - char *newBuf; int bufferSize = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); if (bufferSize == 0) @@ -2522,49 +2578,12 @@ XML_GetBuffer(XML_Parser parser, int len) { } // NOTE: We are avoiding MALLOC(..) here to leave limiting // the input size to the application using Expat. - newBuf = parser->m_mem.malloc_fcn(bufferSize); + char *const newBuf = parser->m_mem.malloc_fcn(bufferSize); if (newBuf == NULL) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } - parser->m_bufferLim = newBuf + bufferSize; -#if XML_CONTEXT_BYTES > 0 - if (parser->m_bufferPtr) { - memcpy(newBuf, &parser->m_bufferPtr[-keep], - EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) - + keep); - // NOTE: We are avoiding FREE(..) here because parser->m_buffer - // is not being allocated with MALLOC(..) but with plain - // .malloc_fcn(..). - parser->m_mem.free_fcn(parser->m_buffer); - parser->m_buffer = newBuf; - parser->m_bufferEnd - = parser->m_buffer - + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) - + keep; - parser->m_bufferPtr = parser->m_buffer + keep; - } else { - /* This must be a brand new buffer with no data in it yet */ - parser->m_bufferEnd = newBuf; - parser->m_bufferPtr = parser->m_buffer = newBuf; - } -#else - if (parser->m_bufferPtr) { - memcpy(newBuf, parser->m_bufferPtr, - EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); - // NOTE: We are avoiding FREE(..) here because parser->m_buffer - // is not being allocated with MALLOC(..) but with plain - // .malloc_fcn(..). - parser->m_mem.free_fcn(parser->m_buffer); - parser->m_bufferEnd - = newBuf - + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); - } else { - /* This must be a brand new buffer with no data in it yet */ - parser->m_bufferEnd = newBuf; - } - parser->m_bufferPtr = parser->m_buffer = newBuf; -#endif /* XML_CONTEXT_BYTES > 0 */ + setParserBuffer(parser, newBuf, bufferSize, keep); } parser->m_eventPtr = parser->m_eventEndPtr = NULL; parser->m_positionPtr = NULL; @@ -2617,7 +2636,7 @@ enum XML_Status XMLCALL XML_ResumeParser(XML_Parser parser) { enum XML_Status result = XML_STATUS_OK; - if (parser == NULL) + if ((parser == NULL) || isCalledFromInsideHandler(parser)) return XML_STATUS_ERROR; if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; @@ -2643,6 +2662,7 @@ XML_ResumeParser(XML_Parser parser) { parser->m_parsingStatus.parsing = XML_FINISHED; return result; } + break; default:; } } @@ -2704,7 +2724,7 @@ XML_GetInputContext(XML_Parser parser, int *offset, int *size) { (void)offset; (void)size; #endif /* XML_CONTEXT_BYTES > 0 */ - return (const char *)0; + return NULL; } XML_Size XMLCALL @@ -3063,7 +3083,7 @@ storeRawNames(XML_Parser parser) { */ rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); /* Detect and prevent integer overflow. */ - if (rawNameLen > (size_t)INT_MAX - nameLen) + if (rawNameLen > SIZE_MAX - nameLen) return XML_FALSE; bufSize = nameLen + rawNameLen; if (bufSize > (size_t)(tag->bufEnd - tag->buf.raw)) { @@ -3190,7 +3210,7 @@ externalEntityInitProcessor3(XML_Parser parser, const char *start, if (parser->m_reenter) { return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE } - /* Fall through */ + EXPAT_FALLTHROUGH; default: start = next; } @@ -3269,7 +3289,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, *eventEndPP = end; if (parser->m_characterDataHandler) { XML_Char c = 0xA; + beforeHandler(parser); parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + afterHandler(parser); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); /* We are at the end of the final buffer, should we check for @@ -3322,9 +3344,11 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, ((char *)&ch) + sizeof(XML_Char), __LINE__, XML_ACCOUNT_ENTITY_EXPANSION); #endif /* XML_GE == 1 */ - if (parser->m_characterDataHandler) + if (parser->m_characterDataHandler) { + beforeHandler(parser); parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); - else if (parser->m_defaultHandler) + afterHandler(parser); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; } @@ -3344,9 +3368,11 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, else if (! entity->is_internal) return XML_ERROR_ENTITY_DECLARED_IN_PE; } else if (! entity) { - if (parser->m_skippedEntityHandler) + if (parser->m_skippedEntityHandler) { + beforeHandler(parser); parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); - else if (parser->m_defaultHandler) + afterHandler(parser); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; } @@ -3357,10 +3383,12 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, if (entity->textPtr) { enum XML_Error result; if (! parser->m_defaultExpandInternalEntities) { - if (parser->m_skippedEntityHandler) + if (parser->m_skippedEntityHandler) { + beforeHandler(parser); parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0); - else if (parser->m_defaultHandler) + afterHandler(parser); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; } @@ -3374,9 +3402,12 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, entity->open = XML_FALSE; if (! context) return XML_ERROR_NO_MEMORY; - if (! parser->m_externalEntityRefHandler( - parser->m_externalEntityRefHandlerArg, context, entity->base, - entity->systemId, entity->publicId)) + beforeHandler(parser); + const int status = parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, context, entity->base, + entity->systemId, entity->publicId); + afterHandler(parser); + if (! status) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; poolDiscard(&parser->m_tempPool); } else if (parser->m_defaultHandler) @@ -3384,7 +3415,6 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, break; } case XML_TOK_START_TAG_NO_ATTS: - /* fall through */ case XML_TOK_START_TAG_WITH_ATTS: { TAG *tag; enum XML_Error result; @@ -3416,11 +3446,10 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *fromPtr = tag->rawName; toPtr = tag->buf.str; for (;;) { - int convLen; const enum XML_Convert_Result convert_res = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); - convLen = (int)(toPtr - tag->buf.str); + const size_t convLen = (size_t)(toPtr - tag->buf.str); if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { tag->name.strLen = convLen; @@ -3445,16 +3474,17 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); if (result) return result; - if (parser->m_startElementHandler) + if (parser->m_startElementHandler) { + beforeHandler(parser); parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, (const XML_Char **)parser->m_atts); - else if (parser->m_defaultHandler) + afterHandler(parser); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); poolClear(&parser->m_tempPool); break; } case XML_TOK_EMPTY_ELEMENT_NO_ATTS: - /* fall through */ case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { const char *rawName = s + enc->minBytesPerChar; enum XML_Error result; @@ -3474,14 +3504,18 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, } poolFinish(&parser->m_tempPool); if (parser->m_startElementHandler) { + beforeHandler(parser); parser->m_startElementHandler(parser->m_handlerArg, name.str, (const XML_Char **)parser->m_atts); + afterHandler(parser); noElmHandlers = XML_FALSE; } if (parser->m_endElementHandler) { if (parser->m_startElementHandler) *eventPP = *eventEndPP; + beforeHandler(parser); parser->m_endElementHandler(parser->m_handlerArg, name.str); + afterHandler(parser); noElmHandlers = XML_FALSE; } if (noElmHandlers && parser->m_defaultHandler) @@ -3539,14 +3573,19 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, } *uri = XML_T('\0'); } + beforeHandler(parser); parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); + afterHandler(parser); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); while (tag->bindings) { BINDING *b = tag->bindings; - if (parser->m_endNamespaceDeclHandler) + if (parser->m_endNamespaceDeclHandler) { + beforeHandler(parser); parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); + afterHandler(parser); + } tag->bindings = tag->bindings->nextTagBinding; b->nextTagBinding = parser->m_freeBindingList; parser->m_freeBindingList = b; @@ -3569,8 +3608,10 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, return XML_ERROR_BAD_CHAR_REF; if (parser->m_characterDataHandler) { XML_Char buf[XML_ENCODE_MAX]; + beforeHandler(parser); parser->m_characterDataHandler(parser->m_handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); + afterHandler(parser); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); } break; @@ -3579,32 +3620,38 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, case XML_TOK_DATA_NEWLINE: if (parser->m_characterDataHandler) { XML_Char c = 0xA; + beforeHandler(parser); parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + afterHandler(parser); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; case XML_TOK_CDATA_SECT_OPEN: { enum XML_Error result; - if (parser->m_startCdataSectionHandler) + if (parser->m_startCdataSectionHandler) { + beforeHandler(parser); parser->m_startCdataSectionHandler(parser->m_handlerArg); - /* BEGIN disabled code */ - /* Suppose you doing a transformation on a document that involves - changing only the character data. You set up a defaultHandler - and a characterDataHandler. The defaultHandler simply copies - characters through. The characterDataHandler does the - transformation and writes the characters out escaping them as - necessary. This case will fail to work if we leave out the - following two lines (because & and < inside CDATA sections will - be incorrectly escaped). - - However, now we have a start/endCdataSectionHandler, so it seems - easier to let the user deal with this. - */ - else if ((0) && parser->m_characterDataHandler) + afterHandler(parser); + /* BEGIN disabled code */ + /* Suppose you doing a transformation on a document that involves + changing only the character data. You set up a defaultHandler + and a characterDataHandler. The defaultHandler simply copies + characters through. The characterDataHandler does the + transformation and writes the characters out escaping them as + necessary. This case will fail to work if we leave out the + following two lines (because & and < inside CDATA sections will + be incorrectly escaped). + + However, now we have a start/endCdataSectionHandler, so it seems + easier to let the user deal with this. + */ + } else if ((0) && parser->m_characterDataHandler) { + beforeHandler(parser); parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); - /* END disabled code */ - else if (parser->m_defaultHandler) + afterHandler(parser); + /* END disabled code */ + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); @@ -3624,13 +3671,18 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, if (MUST_CONVERT(enc, s)) { ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); + beforeHandler(parser); parser->m_characterDataHandler( parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); - } else + afterHandler(parser); + } else { + beforeHandler(parser); parser->m_characterDataHandler( parser->m_handlerArg, (const XML_Char *)s, (int)((const XML_Char *)end - (const XML_Char *)s)); + afterHandler(parser); + } } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); /* We are at the end of the final buffer, should we check for @@ -3655,16 +3707,21 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const enum XML_Convert_Result convert_res = XmlConvert( enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = s; + beforeHandler(parser); charDataHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + afterHandler(parser); if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } - } else + } else { + beforeHandler(parser); charDataHandler(parser->m_handlerArg, (const XML_Char *)s, (int)((const XML_Char *)next - (const XML_Char *)s)); + afterHandler(parser); + } } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); } break; @@ -3702,7 +3759,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, *nextPtr = next; return XML_ERROR_NONE; } - /* Fall through */ + EXPAT_FALLTHROUGH; default:; *eventPP = s = next; } @@ -3722,8 +3779,11 @@ freeBindings(XML_Parser parser, BINDING *bindings) { /* m_startNamespaceDeclHandler will have been called for this * binding in addBindings(), so call the end handler now. */ - if (parser->m_endNamespaceDeclHandler) + if (parser->m_endNamespaceDeclHandler) { + beforeHandler(parser); parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); + afterHandler(parser); + } bindings = bindings->nextTagBinding; b->nextTagBinding = parser->m_freeBindingList; @@ -3747,20 +3807,14 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, TAG_NAME *tagNamePtr, BINDING **bindingsPtr, enum XML_Account account) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ - ELEMENT_TYPE *elementType; - int nDefaultAtts; - const XML_Char **appAtts; /* the attribute list for the application */ int attIndex = 0; - int prefixLen; - int i; - int n; XML_Char *uri; int nPrefixes = 0; BINDING *binding; const XML_Char *localPart; /* lookup the element type name */ - elementType + ELEMENT_TYPE *elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); if (! elementType) { const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); @@ -3775,75 +3829,71 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, if (parser->m_ns && ! setElementTypePrefix(parser, elementType)) return XML_ERROR_NO_MEMORY; } - nDefaultAtts = elementType->nDefaultAtts; + const size_t nDefaultAtts = elementType->nDefaultAtts; + + /* Detect and prevent integer overflow. */ + if (parser->m_attsSize > (size_t)INT_MAX) + return XML_ERROR_NO_MEMORY; /* get the attributes from the tokenizer */ - n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); + size_t n = (size_t)XmlGetAttributes(enc, attStr, (int)parser->m_attsSize, + parser->m_atts); /* Detect and prevent integer overflow */ - if (n > INT_MAX - nDefaultAtts) { + if (n > SIZE_MAX - nDefaultAtts) { return XML_ERROR_NO_MEMORY; } if (n + nDefaultAtts > parser->m_attsSize) { - int oldAttsSize = parser->m_attsSize; - ATTRIBUTE *temp; -#ifdef XML_ATTR_INFO - XML_AttrInfo *temp2; -#endif + size_t oldAttsSize = parser->m_attsSize; /* Detect and prevent integer overflow */ - if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) - || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { + if ((nDefaultAtts > SIZE_MAX - INIT_ATTS_SIZE) + || (n > SIZE_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { return XML_ERROR_NO_MEMORY; } parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; - /* Detect and prevent integer overflow. - * The preprocessor guard addresses the "always false" warning - * from -Wtype-limits on platforms where - * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ -#if UINT_MAX >= SIZE_MAX - if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(ATTRIBUTE)) { + /* Detect and prevent integer overflow. */ + if (parser->m_attsSize > SIZE_MAX / sizeof(ATTRIBUTE)) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } -#endif - temp = REALLOC(parser, parser->m_atts, - parser->m_attsSize * sizeof(ATTRIBUTE)); + ATTRIBUTE *const temp = REALLOC(parser, parser->m_atts, + parser->m_attsSize * sizeof(ATTRIBUTE)); if (temp == NULL) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } parser->m_atts = temp; #ifdef XML_ATTR_INFO - /* Detect and prevent integer overflow. - * The preprocessor guard addresses the "always false" warning - * from -Wtype-limits on platforms where - * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ -# if UINT_MAX >= SIZE_MAX - if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(XML_AttrInfo)) { + /* Detect and prevent integer overflow. */ + if (parser->m_attsSize > SIZE_MAX / sizeof(XML_AttrInfo)) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } -# endif - temp2 = REALLOC(parser, parser->m_attInfo, - parser->m_attsSize * sizeof(XML_AttrInfo)); + XML_AttrInfo *const temp2 = REALLOC( + parser, parser->m_attInfo, parser->m_attsSize * sizeof(XML_AttrInfo)); if (temp2 == NULL) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } parser->m_attInfo = temp2; #endif - if (n > oldAttsSize) - XmlGetAttributes(enc, attStr, n, parser->m_atts); + if (n > oldAttsSize) { + /* Detect and prevent integer overflow. */ + if (n > (size_t)INT_MAX) + return XML_ERROR_NO_MEMORY; + XmlGetAttributes(enc, attStr, (int)n, parser->m_atts); + } } - appAtts = (const XML_Char **)parser->m_atts; - for (i = 0; i < n; i++) { + /* the attribute list for the application */ + const XML_Char **const appAtts = (const XML_Char **)parser->m_atts; + for (size_t i = 0; i < n; i++) { ATTRIBUTE *currAtt = &parser->m_atts[i]; #ifdef XML_ATTR_INFO XML_AttrInfo *currAttInfo = &parser->m_attInfo[i]; @@ -3876,13 +3926,11 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, (attId->name)[-1] = 1; appAtts[attIndex++] = attId->name; if (! parser->m_atts[i].normalized) { - enum XML_Error result; XML_Bool isCdata = XML_TRUE; /* figure out whether declared as other than CDATA */ if (attId->maybeTokenized) { - int j; - for (j = 0; j < nDefaultAtts; j++) { + for (size_t j = 0; j < nDefaultAtts; j++) { if (attId == elementType->defaultAtts[j].id) { isCdata = elementType->defaultAtts[j].isCdata; break; @@ -3891,7 +3939,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } /* normalize the attribute value */ - result = storeAttributeValue( + const enum XML_Error result = storeAttributeValue( parser, enc, isCdata, parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd, &parser->m_tempPool, account); if (result) @@ -3929,7 +3977,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ parser->m_nSpecifiedAtts = attIndex; if (elementType->idAtt && (elementType->idAtt->name)[-1]) { - for (i = 0; i < attIndex; i += 2) + for (int i = 0; i < attIndex; i += 2) if (appAtts[i] == elementType->idAtt->name) { parser->m_idAttIndex = i; break; @@ -3938,7 +3986,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, parser->m_idAttIndex = -1; /* do attribute defaulting */ - for (i = 0; i < nDefaultAtts; i++) { + for (size_t i = 0; i < nDefaultAtts; i++) { const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; if (! (da->id->name)[-1] && da->value) { if (da->id->prefix) { @@ -3964,7 +4012,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, /* expand prefixed attribute names, check for duplicates, and clear flags that say whether attributes were specified */ - i = 0; + int i = 0; if (nPrefixes) { unsigned int j; /* hash table index */ unsigned long version = parser->m_nsAttsVersion; @@ -3979,7 +4027,6 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, /* size of hash table must be at least 2 * (# of prefixed attributes) */ if ((nPrefixes << 1) >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ - NS_ATT *temp; /* hash table size must also be a power of 2 and >= 8 */ while (nPrefixes >> parser->m_nsAttsPower++) ; @@ -4007,7 +4054,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } #endif - temp = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT)); + NS_ATT *const temp + = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT)); if (! temp) { /* Restore actual size of memory in m_nsAtts */ parser->m_nsAttsPower = oldNsAttsPower; @@ -4028,9 +4076,6 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, for (; i < attIndex; i += 2) { const XML_Char *s = appAtts[i]; if (s[-1] == 2) { /* prefixed */ - ATTRIBUTE_ID *id; - const BINDING *b; - unsigned long uriHash; struct siphash sip_state; struct sipkey sip_key; @@ -4038,7 +4083,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, sip24_init(&sip_state, &sip_key); ((XML_Char *)s)[-1] = 0; /* clear flag */ - id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); + ATTRIBUTE_ID *const id + = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); if (! id || ! id->prefix) { /* This code is walking through the appAtts array, dealing * with (in this case) a prefixed attribute name. To be in @@ -4046,7 +4092,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * has to have passed through the hash table lookup once * already. That implies that an entry for it already * exists, so the lookup above will return a pointer to - * already allocated memory. There is no opportunaity for + * already allocated memory. There is no opportunity for * the allocator to fail, so the condition above cannot be * fulfilled. * @@ -4056,15 +4102,12 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, */ return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ } - b = id->prefix->binding; + const BINDING *const b = id->prefix->binding; if (! b) return XML_ERROR_UNBOUND_PREFIX; - for (j = 0; j < (unsigned int)b->uriLen; j++) { - const XML_Char c = b->uri[j]; - if (! poolAppendChar(&parser->m_tempPool, c)) - return XML_ERROR_NO_MEMORY; - } + if (! poolAppendChars(&parser->m_tempPool, b->uri, b->uriLen)) + return XML_ERROR_NO_MEMORY; sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); @@ -4073,12 +4116,13 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); - do { /* copies null terminator */ - if (! poolAppendChar(&parser->m_tempPool, *s)) + { + const size_t len = xcslen(s) + /*null terminator*/ 1; + if (! poolAppendChars(&parser->m_tempPool, s, len)) return XML_ERROR_NO_MEMORY; - } while (*s++); + } - uriHash = (unsigned long)sip24_final(&sip_state); + const unsigned long uriHash = (unsigned long)sip24_final(&sip_state); { /* Check hash table for duplicate of expanded name (uriName). Derived from code in lookup(parser, HASH_TABLE *table, ...). @@ -4106,10 +4150,9 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, if (parser->m_ns_triplets) { /* append namespace separator and prefix */ parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; s = b->prefix->name; - do { - if (! poolAppendChar(&parser->m_tempPool, *s)) - return XML_ERROR_NO_MEMORY; - } while (*s++); + const size_t len = xcslen(s) + /*null terminator*/ 1; + if (! poolAppendChars(&parser->m_tempPool, s, len)) + return XML_ERROR_NO_MEMORY; } /* store expanded name in attribute list */ @@ -4152,48 +4195,36 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, localPart = tagNamePtr->str; } else return XML_ERROR_NONE; - prefixLen = 0; - if (parser->m_ns_triplets && binding->prefix->name) { - while (binding->prefix->name[prefixLen++]) - ; /* prefixLen includes null terminator */ - } + size_t prefixLen = 0; + if (parser->m_ns_triplets && binding->prefix->name) + prefixLen = xcslen(binding->prefix->name) + /*null terminator*/ 1; tagNamePtr->localPart = localPart; tagNamePtr->uriLen = binding->uriLen; tagNamePtr->prefix = binding->prefix->name; tagNamePtr->prefixLen = prefixLen; - for (i = 0; localPart[i++];) - ; /* i includes null terminator */ + + const size_t localPartLen = xcslen(localPart) + /*null terminator*/ 1; /* Detect and prevent integer overflow */ - if (binding->uriLen > INT_MAX - prefixLen - || i > INT_MAX - (binding->uriLen + prefixLen)) { + if (binding->uriLen > SIZE_MAX - prefixLen + || localPartLen > SIZE_MAX - (binding->uriLen + prefixLen)) { return XML_ERROR_NO_MEMORY; } - n = i + binding->uriLen + prefixLen; - if (n > binding->uriAlloc) { - TAG *p; - + const size_t totalLen = localPartLen + binding->uriLen + prefixLen; + if (totalLen > binding->uriAlloc) { /* Detect and prevent integer overflow */ - if (n > INT_MAX - EXPAND_SPARE) { - return XML_ERROR_NO_MEMORY; - } - /* Detect and prevent integer overflow. - * The preprocessor guard addresses the "always false" warning - * from -Wtype-limits on platforms where - * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ -#if UINT_MAX >= SIZE_MAX - if ((unsigned)(n + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { + if (totalLen > SIZE_MAX - EXPAND_SPARE + || totalLen + EXPAND_SPARE > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } -#endif - uri = MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); + uri = MALLOC(parser, (totalLen + EXPAND_SPARE) * sizeof(XML_Char)); if (! uri) return XML_ERROR_NO_MEMORY; - binding->uriAlloc = n + EXPAND_SPARE; + binding->uriAlloc = totalLen + EXPAND_SPARE; memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); - for (p = parser->m_tagStack; p; p = p->parent) + for (TAG *p = parser->m_tagStack; p; p = p->parent) if (p->name.str == binding->uri) p->name.str = uri; FREE(parser, binding->uri); @@ -4201,10 +4232,14 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } /* if m_namespaceSeparator != '\0' then uri includes it already */ uri = binding->uri + binding->uriLen; - memcpy(uri, localPart, i * sizeof(XML_Char)); + /* Detect and prevent integer overflow */ + if (localPartLen > SIZE_MAX / sizeof(XML_Char)) { + return XML_ERROR_NO_MEMORY; + } + memcpy(uri, localPart, localPartLen * sizeof(XML_Char)); /* we always have a namespace separator between localPart and prefix */ if (prefixLen) { - uri += i - 1; + uri += localPartLen - 1; *uri = parser->m_namespaceSeparator; /* replace null terminator */ memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); } @@ -4339,7 +4374,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'}; - static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; + static const size_t xmlLen = sizeof(xmlNamespace) / sizeof(XML_Char) - 1; // "http://www.w3.org/2000/xmlns/" static const XML_Char xmlnsNamespace[] = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, @@ -4347,15 +4382,14 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; - static const int xmlnsLen - = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; + static const size_t xmlnsLen = sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; XML_Bool mustBeXML = XML_FALSE; XML_Bool isXML = XML_TRUE; XML_Bool isXMLNS = XML_TRUE; BINDING *b; - int len; + size_t len; /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ if (*uri == XML_T('\0') && prefix->name) @@ -4374,6 +4408,10 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, } for (len = 0; uri[len]; len++) { + /* Detect and prevent integer overflow */ + if (len == SIZE_MAX) { + return XML_ERROR_NO_MEMORY; + } if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) isXML = XML_FALSE; @@ -4414,25 +4452,21 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, if (isXMLNS) return XML_ERROR_RESERVED_NAMESPACE_URI; - if (parser->m_namespaceSeparator) + if (parser->m_namespaceSeparator) { + /* Detect and prevent integer overflow */ + if (len == SIZE_MAX) { + return XML_ERROR_NO_MEMORY; + } len++; + } if (parser->m_freeBindingList) { b = parser->m_freeBindingList; if (len > b->uriAlloc) { /* Detect and prevent integer overflow */ - if (len > INT_MAX - EXPAND_SPARE) { - return XML_ERROR_NO_MEMORY; - } - - /* Detect and prevent integer overflow. - * The preprocessor guard addresses the "always false" warning - * from -Wtype-limits on platforms where - * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ -#if UINT_MAX >= SIZE_MAX - if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { + if (len > SIZE_MAX - EXPAND_SPARE + || len + EXPAND_SPARE > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } -#endif XML_Char *temp = REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); @@ -4448,18 +4482,10 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, return XML_ERROR_NO_MEMORY; /* Detect and prevent integer overflow */ - if (len > INT_MAX - EXPAND_SPARE) { + if (len > SIZE_MAX - EXPAND_SPARE + || len + EXPAND_SPARE > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } - /* Detect and prevent integer overflow. - * The preprocessor guard addresses the "always false" warning - * from -Wtype-limits on platforms where - * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ -#if UINT_MAX >= SIZE_MAX - if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { - return XML_ERROR_NO_MEMORY; - } -#endif b->uri = MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (! b->uri) { @@ -4483,9 +4509,12 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, b->nextTagBinding = *bindingsPtr; *bindingsPtr = b; /* if attId == NULL then we are not starting a namespace scope */ - if (attId && parser->m_startNamespaceDeclHandler) + if (attId && parser->m_startNamespaceDeclHandler) { + beforeHandler(parser); parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, prefix->binding ? uri : 0); + afterHandler(parser); + } return XML_ERROR_NONE; } @@ -4547,15 +4576,20 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, *eventEndPP = next; switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: - if (parser->m_endCdataSectionHandler) + if (parser->m_endCdataSectionHandler) { + beforeHandler(parser); parser->m_endCdataSectionHandler(parser->m_handlerArg); + afterHandler(parser); + } /* BEGIN disabled code */ /* see comment under XML_TOK_CDATA_SECT_OPEN */ - else if ((0) && parser->m_characterDataHandler) + else if ((0) && parser->m_characterDataHandler) { + beforeHandler(parser); parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); - /* END disabled code */ - else if (parser->m_defaultHandler) + afterHandler(parser); + /* END disabled code */ + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; *nextPtr = next; @@ -4566,7 +4600,9 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, case XML_TOK_DATA_NEWLINE: if (parser->m_characterDataHandler) { XML_Char c = 0xA; + beforeHandler(parser); parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + afterHandler(parser); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; @@ -4579,16 +4615,21 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const enum XML_Convert_Result convert_res = XmlConvert( enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = next; + beforeHandler(parser); charDataHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + afterHandler(parser); if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } - } else + } else { + beforeHandler(parser); charDataHandler(parser->m_handlerArg, (const XML_Char *)s, (int)((const XML_Char *)next - (const XML_Char *)s)); + afterHandler(parser); + } } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); } break; @@ -4633,7 +4674,7 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, if (parser->m_reenter) { return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE } - /* Fall through */ + EXPAT_FALLTHROUGH; default:; *eventPP = s = next; } @@ -4827,8 +4868,10 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, if (! storedversion) return XML_ERROR_NO_MEMORY; } + beforeHandler(parser); parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, standalone); + afterHandler(parser); } else if (parser->m_defaultHandler) reportDefault(parser, parser->m_encoding, s, next); if (parser->m_protocolEncodingName == NULL) { @@ -4878,8 +4921,11 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { info.convert = NULL; info.data = NULL; info.release = NULL; - if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, - encodingName, &info)) { + beforeHandler(parser); + const int status = parser->m_unknownEncodingHandler( + parser->m_unknownEncodingHandlerData, encodingName, &info); + afterHandler(parser); + if (status) { ENCODING *enc; parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); if (! parser->m_unknownEncodingMem) { @@ -5256,9 +5302,11 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, break; case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: if (parser->m_startDoctypeDeclHandler) { + beforeHandler(parser); parser->m_startDoctypeDeclHandler( parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, parser->m_doctypePubid, 1); + afterHandler(parser); parser->m_doctypeName = NULL; poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; @@ -5297,7 +5345,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, handleDefault = XML_FALSE; goto alreadyChecked; } - /* fall through */ + EXPAT_FALLTHROUGH; case XML_ROLE_ENTITY_PUBLIC_ID: if (! XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; @@ -5325,9 +5373,11 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, } if (parser->m_doctypeName) { + beforeHandler(parser); parser->m_startDoctypeDeclHandler( parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, parser->m_doctypePubid, 0); + afterHandler(parser); poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } @@ -5354,14 +5404,22 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, if (parser->m_useForeignDTD) entity->base = parser->m_curBase; dtd->paramEntityRead = XML_FALSE; - if (! parser->m_externalEntityRefHandler( - parser->m_externalEntityRefHandlerArg, 0, entity->base, - entity->systemId, entity->publicId)) + beforeHandler(parser); + const int status = parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId); + afterHandler(parser); + if (! status) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; if (dtd->paramEntityRead) { - if (! dtd->standalone && parser->m_notStandaloneHandler - && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) - return XML_ERROR_NOT_STANDALONE; + if (! dtd->standalone && parser->m_notStandaloneHandler) { + beforeHandler(parser); + const int handlerStatus + = parser->m_notStandaloneHandler(parser->m_handlerArg); + afterHandler(parser); + if (! handlerStatus) + return XML_ERROR_NOT_STANDALONE; + } } /* if we didn't read the foreign DTD then this means that there is no external subset and we must reset dtd->hasParamEntityRefs @@ -5374,7 +5432,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, } #endif /* XML_DTD */ if (parser->m_endDoctypeDeclHandler) { + beforeHandler(parser); parser->m_endDoctypeDeclHandler(parser->m_handlerArg); + afterHandler(parser); handleDefault = XML_FALSE; } break; @@ -5394,14 +5454,22 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, return XML_ERROR_NO_MEMORY; entity->base = parser->m_curBase; dtd->paramEntityRead = XML_FALSE; - if (! parser->m_externalEntityRefHandler( - parser->m_externalEntityRefHandlerArg, 0, entity->base, - entity->systemId, entity->publicId)) + beforeHandler(parser); + const int status = parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId); + afterHandler(parser); + if (! status) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; if (dtd->paramEntityRead) { - if (! dtd->standalone && parser->m_notStandaloneHandler - && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) - return XML_ERROR_NOT_STANDALONE; + if (! dtd->standalone && parser->m_notStandaloneHandler) { + beforeHandler(parser); + const int handlerStatus + = parser->m_notStandaloneHandler(parser->m_handlerArg); + afterHandler(parser); + if (! handlerStatus) + return XML_ERROR_NOT_STANDALONE; + } } /* if we didn't read the foreign DTD then this means that there is no external subset and we must reset dtd->hasParamEntityRefs @@ -5494,10 +5562,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, poolFinish(&parser->m_tempPool); } *eventEndPP = s; + beforeHandler(parser); parser->m_attlistDeclHandler( parser->m_handlerArg, parser->m_declElementType->name, parser->m_declAttributeId->name, parser->m_declAttributeType, 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); + afterHandler(parser); handleDefault = XML_FALSE; } } @@ -5532,10 +5602,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, poolFinish(&parser->m_tempPool); } *eventEndPP = s; + beforeHandler(parser); parser->m_attlistDeclHandler( parser->m_handlerArg, parser->m_declElementType->name, parser->m_declAttributeId->name, parser->m_declAttributeType, attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); + afterHandler(parser); poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } @@ -5550,16 +5622,22 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar, XML_ACCOUNT_NONE); if (parser->m_declEntity) { + /* Detect and prevent signed integer overflow */ + if ((size_t)poolLength(&dtd->entityValuePool) > (size_t)INT_MAX) { + return XML_ERROR_NO_MEMORY; + } parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); parser->m_declEntity->textLen = (int)(poolLength(&dtd->entityValuePool)); poolFinish(&dtd->entityValuePool); if (parser->m_entityDeclHandler) { *eventEndPP = s; + beforeHandler(parser); parser->m_entityDeclHandler( parser->m_handlerArg, parser->m_declEntity->name, parser->m_declEntity->is_param, parser->m_declEntity->textPtr, parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); + afterHandler(parser); handleDefault = XML_FALSE; } } else @@ -5577,10 +5655,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, if (parser->m_entityDeclHandler) { *eventEndPP = s; + beforeHandler(parser); parser->m_entityDeclHandler( parser->m_handlerArg, parser->m_declEntity->name, parser->m_declEntity->is_param, parser->m_declEntity->textPtr, parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); + afterHandler(parser); handleDefault = XML_FALSE; } } @@ -5611,9 +5691,13 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, #ifdef XML_DTD && ! parser->m_paramEntityParsing #endif /* XML_DTD */ - && parser->m_notStandaloneHandler - && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) - return XML_ERROR_NOT_STANDALONE; + && parser->m_notStandaloneHandler) { + beforeHandler(parser); + const int status = parser->m_notStandaloneHandler(parser->m_handlerArg); + afterHandler(parser); + if (! status) + return XML_ERROR_NOT_STANDALONE; + } #ifndef XML_DTD break; #else /* XML_DTD */ @@ -5625,7 +5709,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, parser->m_declEntity->publicId = NULL; } #endif /* XML_DTD */ - /* fall through */ + EXPAT_FALLTHROUGH; case XML_ROLE_ENTITY_SYSTEM_ID: if (dtd->keepProcessing && parser->m_declEntity) { parser->m_declEntity->systemId @@ -5656,10 +5740,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) { *eventEndPP = s; + beforeHandler(parser); parser->m_entityDeclHandler( parser->m_handlerArg, parser->m_declEntity->name, parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base, parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0); + afterHandler(parser); handleDefault = XML_FALSE; } break; @@ -5672,17 +5758,21 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, poolFinish(&dtd->pool); if (parser->m_unparsedEntityDeclHandler) { *eventEndPP = s; + beforeHandler(parser); parser->m_unparsedEntityDeclHandler( parser->m_handlerArg, parser->m_declEntity->name, parser->m_declEntity->base, parser->m_declEntity->systemId, parser->m_declEntity->publicId, parser->m_declEntity->notation); + afterHandler(parser); handleDefault = XML_FALSE; } else if (parser->m_entityDeclHandler) { *eventEndPP = s; + beforeHandler(parser); parser->m_entityDeclHandler( parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, parser->m_declEntity->base, parser->m_declEntity->systemId, parser->m_declEntity->publicId, parser->m_declEntity->notation); + afterHandler(parser); handleDefault = XML_FALSE; } } @@ -5789,9 +5879,11 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, if (! systemId) return XML_ERROR_NO_MEMORY; *eventEndPP = s; + beforeHandler(parser); parser->m_notationDeclHandler( parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, systemId, parser->m_declNotationPublicId); + afterHandler(parser); handleDefault = XML_FALSE; } poolClear(&parser->m_tempPool); @@ -5799,9 +5891,11 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, case XML_ROLE_NOTATION_NO_SYSTEM_ID: if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { *eventEndPP = s; + beforeHandler(parser); parser->m_notationDeclHandler( parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 0, parser->m_declNotationPublicId); + afterHandler(parser); handleDefault = XML_FALSE; } poolClear(&parser->m_tempPool); @@ -5835,41 +5929,18 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, case XML_ROLE_GROUP_OPEN: if (parser->m_prologState.level >= parser->m_groupSize) { if (parser->m_groupSize) { - { - /* Detect and prevent integer overflow */ - if (parser->m_groupSize > (unsigned int)(-1) / 2u) { - return XML_ERROR_NO_MEMORY; - } - - char *const new_connector = REALLOC( - parser, parser->m_groupConnector, parser->m_groupSize *= 2); - if (new_connector == NULL) { - parser->m_groupSize /= 2; - return XML_ERROR_NO_MEMORY; - } - parser->m_groupConnector = new_connector; + /* Detect and prevent integer overflow */ + if (parser->m_groupSize > SIZE_MAX / 2) { + return XML_ERROR_NO_MEMORY; } - if (dtd->scaffIndex) { - /* Detect and prevent integer overflow. - * The preprocessor guard addresses the "always false" warning - * from -Wtype-limits on platforms where - * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ -#if UINT_MAX >= SIZE_MAX - if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { - parser->m_groupSize /= 2; - return XML_ERROR_NO_MEMORY; - } -#endif - - int *const new_scaff_index = REALLOC( - parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); - if (new_scaff_index == NULL) { - parser->m_groupSize /= 2; - return XML_ERROR_NO_MEMORY; - } - dtd->scaffIndex = new_scaff_index; + char *const new_connector = REALLOC(parser, parser->m_groupConnector, + parser->m_groupSize *= 2); + if (new_connector == NULL) { + parser->m_groupSize /= 2; + return XML_ERROR_NO_MEMORY; } + parser->m_groupConnector = new_connector; } else { parser->m_groupConnector = MALLOC(parser, parser->m_groupSize = 32); if (! parser->m_groupConnector) { @@ -5884,6 +5955,21 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, if (myindex < 0) return XML_ERROR_NO_MEMORY; assert(dtd->scaffIndex != NULL); + if ((size_t)dtd->scaffLevel >= dtd->scaffIndexSize) { + /* Detect and prevent integer overflow */ + if (dtd->scaffIndexSize > SIZE_MAX / 2 / sizeof(int)) { + return XML_ERROR_NO_MEMORY; + } + assert(dtd->scaffIndexSize > 0); + const size_t new_size = dtd->scaffIndexSize * 2; + int *const new_scaff_index + = REALLOC(parser, dtd->scaffIndex, new_size * sizeof(int)); + if (new_scaff_index == NULL) { + return XML_ERROR_NO_MEMORY; + } + dtd->scaffIndex = new_scaff_index; + dtd->scaffIndexSize = new_size; + } dtd->scaffIndex[dtd->scaffLevel] = myindex; dtd->scaffLevel++; dtd->scaffold[myindex].type = XML_CTYPE_SEQ; @@ -5964,7 +6050,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, /* cannot report skipped entities in declarations */ if ((role == XML_ROLE_PARAM_ENTITY_REF) && parser->m_skippedEntityHandler) { + beforeHandler(parser); parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); + afterHandler(parser); handleDefault = XML_FALSE; } break; @@ -5985,9 +6073,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; entityTrackingOnOpen(parser, entity, __LINE__); - if (! parser->m_externalEntityRefHandler( - parser->m_externalEntityRefHandlerArg, 0, entity->base, - entity->systemId, entity->publicId)) { + beforeHandler(parser); + const int status = parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId); + afterHandler(parser); + if (! status) { entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; return XML_ERROR_EXTERNAL_ENTITY_HANDLING; @@ -6005,9 +6096,13 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, } } #endif /* XML_DTD */ - if (! dtd->standalone && parser->m_notStandaloneHandler - && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) - return XML_ERROR_NOT_STANDALONE; + if (! dtd->standalone && parser->m_notStandaloneHandler) { + beforeHandler(parser); + const int status = parser->m_notStandaloneHandler(parser->m_handlerArg); + afterHandler(parser); + if (! status) + return XML_ERROR_NOT_STANDALONE; + } break; /* Element declaration stuff */ @@ -6042,8 +6137,10 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY : XML_CTYPE_EMPTY); *eventEndPP = s; + beforeHandler(parser); parser->m_elementDeclHandler( parser->m_handlerArg, parser->m_declElementType->name, content); + afterHandler(parser); handleDefault = XML_FALSE; } dtd->in_eldecl = XML_FALSE; @@ -6087,9 +6184,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, return XML_ERROR_NO_MEMORY; name = el->name; dtd->scaffold[myindex].name = name; - nameLen = 0; - while (name[nameLen++]) - ; + nameLen = xcslen(name) + /*null terminator*/ 1; /* Detect and prevent integer overflow */ if (nameLen > UINT_MAX - dtd->contentStringLen) { @@ -6125,8 +6220,10 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, if (! model) return XML_ERROR_NO_MEMORY; *eventEndPP = s; + beforeHandler(parser); parser->m_elementDeclHandler( parser->m_handlerArg, parser->m_declElementType->name, model); + afterHandler(parser); } dtd->in_eldecl = XML_FALSE; dtd->contentStringLen = 0; @@ -6188,7 +6285,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, *nextPtr = next; return XML_ERROR_NONE; } - /* Fall through */ + EXPAT_FALLTHROUGH; default: s = next; tok = XmlPrologTok(enc, s, end, &next); @@ -6268,7 +6365,7 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end, if (parser->m_reenter) { return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE } - /* Fall through */ + EXPAT_FALLTHROUGH; default:; parser->m_eventPtr = s = next; } @@ -6278,20 +6375,18 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end, static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl, enum EntityType type) { - OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList; + OPEN_INTERNAL_ENTITY *openEntity, **openEntityList; + OPEN_INTERNAL_ENTITY **const freeEntityList = &parser->m_freeEntities; switch (type) { case ENTITY_INTERNAL: parser->m_processor = internalEntityProcessor; openEntityList = &parser->m_openInternalEntities; - freeEntityList = &parser->m_freeInternalEntities; break; case ENTITY_ATTRIBUTE: openEntityList = &parser->m_openAttributeEntities; - freeEntityList = &parser->m_freeAttributeEntities; break; case ENTITY_VALUE: openEntityList = &parser->m_openValueEntities; - freeEntityList = &parser->m_freeValueEntities; break; /* default case serves merely as a safety net in case of a * wrong entityType. Therefore we exclude the following lines @@ -6408,8 +6503,8 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, parser->m_openInternalEntities = parser->m_openInternalEntities->next; /* put openEntity back in list of free instances */ - openEntity->next = parser->m_freeInternalEntities; - parser->m_freeInternalEntities = openEntity; + openEntity->next = parser->m_freeEntities; + parser->m_freeEntities = openEntity; if (parser->m_openInternalEntities == NULL) { parser->m_processor = entity->is_param ? prologProcessor : contentProcessor; @@ -6485,8 +6580,8 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, parser->m_openAttributeEntities = parser->m_openAttributeEntities->next; /* put openEntity back in list of free instances */ - openEntity->next = parser->m_freeAttributeEntities; - parser->m_freeAttributeEntities = openEntity; + openEntity->next = parser->m_freeEntities; + parser->m_freeEntities = openEntity; } // Break if an error occurred or there is nothing left to process @@ -6539,7 +6634,6 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, return XML_ERROR_INVALID_TOKEN; case XML_TOK_CHAR_REF: { XML_Char buf[XML_ENCODE_MAX]; - int i; int n = XmlCharRefNumber(enc, ptr); if (n < 0) { if (enc == parser->m_encoding) @@ -6559,10 +6653,9 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, * XmlEncode() is never passed a value it might return an * error for. */ - for (i = 0; i < n; i++) { - if (! poolAppendChar(pool, buf[i])) - return XML_ERROR_NO_MEMORY; - } + + if (! poolAppendChars(pool, buf, n)) + return XML_ERROR_NO_MEMORY; } break; case XML_TOK_DATA_CHARS: if (! poolAppend(pool, enc, ptr, next)) @@ -6570,7 +6663,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, break; case XML_TOK_TRAILING_CR: next = ptr + enc->minBytesPerChar; - /* fall through */ + EXPAT_FALLTHROUGH; case XML_TOK_ATTRIBUTE_VALUE_S: case XML_TOK_DATA_NEWLINE: if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) @@ -6624,8 +6717,11 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, } else if (! entity) { /* Cannot report skipped entity here - see comments on parser->m_skippedEntityHandler. - if (parser->m_skippedEntityHandler) + if (parser->m_skippedEntityHandler) { + beforeHandler(parser); parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); + afterHandler(parser); + } */ /* Cannot call the default handler because this would be out of sync with the call to the startElementHandler. @@ -6758,8 +6854,11 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ /* cannot report skipped entity here - see comments on parser->m_skippedEntityHandler - if (parser->m_skippedEntityHandler) + if (parser->m_skippedEntityHandler) { + beforeHandler(parser); parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); + afterHandler(parser); + } */ dtd->keepProcessing = dtd->standalone; goto endEntityValue; @@ -6775,9 +6874,12 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; entityTrackingOnOpen(parser, entity, __LINE__); - if (! parser->m_externalEntityRefHandler( - parser->m_externalEntityRefHandlerArg, 0, entity->base, - entity->systemId, entity->publicId)) { + beforeHandler(parser); + const int status = parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId); + afterHandler(parser); + if (! status) { entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; @@ -6813,17 +6915,15 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, break; case XML_TOK_TRAILING_CR: next = entityTextPtr + enc->minBytesPerChar; - /* fall through */ + EXPAT_FALLTHROUGH; case XML_TOK_DATA_NEWLINE: - if (pool->end == pool->ptr && ! poolGrow(pool)) { + if (! poolAppendChar(pool, 0xA)) { result = XML_ERROR_NO_MEMORY; goto endEntityValue; } - *(pool->ptr)++ = 0xA; break; case XML_TOK_CHAR_REF: { XML_Char buf[XML_ENCODE_MAX]; - int i; int n = XmlCharRefNumber(enc, entityTextPtr); if (n < 0) { if (enc == parser->m_encoding) @@ -6841,12 +6941,9 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, * XmlEncode() is never passed a value it might return an * error for. */ - for (i = 0; i < n; i++) { - if (pool->end == pool->ptr && ! poolGrow(pool)) { - result = XML_ERROR_NO_MEMORY; - goto endEntityValue; - } - *(pool->ptr)++ = buf[i]; + if (! poolAppendChars(pool, buf, n)) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; } } break; case XML_TOK_PARTIAL: @@ -6943,8 +7040,8 @@ callStoreEntityValue(XML_Parser parser, const ENCODING *enc, parser->m_openValueEntities = parser->m_openValueEntities->next; /* put openEntity back in list of free instances */ - openEntity->next = parser->m_freeValueEntities; - parser->m_freeValueEntities = openEntity; + openEntity->next = parser->m_freeEntities; + parser->m_freeEntities = openEntity; } // Break if an error occurred or there is nothing left to process @@ -6974,6 +7071,11 @@ storeSelfEntityValue(XML_Parser parser, ENTITY *entity) { return XML_ERROR_NO_MEMORY; } + /* Detect and prevent signed integer overflow */ + if ((size_t)poolLength(pool) > (size_t)INT_MAX) { + poolDiscard(pool); + return XML_ERROR_NO_MEMORY; + } entity->textPtr = poolStart(pool); entity->textLen = (int)(poolLength(pool)); poolFinish(pool); @@ -7026,7 +7128,9 @@ reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, if (! data) return 0; normalizeLines(data); + beforeHandler(parser); parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); + afterHandler(parser); poolClear(&parser->m_tempPool); return 1; } @@ -7046,7 +7150,9 @@ reportComment(XML_Parser parser, const ENCODING *enc, const char *start, if (! data) return 0; normalizeLines(data); + beforeHandler(parser); parser->m_commentHandler(parser->m_handlerArg, data); + afterHandler(parser); poolClear(&parser->m_tempPool); return 1; } @@ -7087,15 +7193,20 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = s; + beforeHandler(parser); parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + afterHandler(parser); *eventPP = s; } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); - } else + } else { + beforeHandler(parser); parser->m_defaultHandler( parser->m_handlerArg, (const XML_Char *)s, (int)((const XML_Char *)end - (const XML_Char *)s)); + afterHandler(parser); + } } static int @@ -7106,48 +7217,34 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, /* The handling of default attributes gets messed up if we have a default which duplicates a non-default. */ NAMED *const nameFound - = (NAMED *)lookup(parser, &(type->defaultAttsNames), attId->name, 0); + = lookup(parser, &(type->defaultAttsNames), attId->name, 0); if (nameFound) return 1; if (isId && ! type->idAtt && ! attId->xmlns) type->idAtt = attId; } if (type->nDefaultAtts == type->allocDefaultAtts) { - if (type->allocDefaultAtts == 0) { - type->allocDefaultAtts = 8; - type->defaultAtts - = MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); - if (! type->defaultAtts) { - type->allocDefaultAtts = 0; - return 0; - } - } else { - DEFAULT_ATTRIBUTE *temp; - - /* Detect and prevent integer overflow */ - if (type->allocDefaultAtts > INT_MAX / 2) { - return 0; - } - - int count = type->allocDefaultAtts * 2; + /* Detect and prevent integer overflow */ + if (type->allocDefaultAtts > SIZE_MAX / 2) { + return 0; + } - /* Detect and prevent integer overflow. - * The preprocessor guard addresses the "always false" warning - * from -Wtype-limits on platforms where - * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ -#if UINT_MAX >= SIZE_MAX - if ((unsigned)count > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { - return 0; - } -#endif + size_t count = type->allocDefaultAtts * 2; + if (count == 0) { + count = 8; + } - temp = REALLOC(parser, type->defaultAtts, - (count * sizeof(DEFAULT_ATTRIBUTE))); - if (temp == NULL) - return 0; - type->allocDefaultAtts = count; - type->defaultAtts = temp; + /* Detect and prevent integer overflow. */ + if (count > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { + return 0; } + + DEFAULT_ATTRIBUTE *const temp = REALLOC( + parser, type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE))); + if (temp == NULL) + return 0; + type->allocDefaultAtts = count; + type->defaultAtts = temp; } att = type->defaultAtts + type->nDefaultAtts; att->id = attId; @@ -7156,8 +7253,8 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, if (! isCdata) attId->maybeTokenized = XML_TRUE; - NAMED *const nameAddedOrFound = (NAMED *)lookup( - parser, &(type->defaultAttsNames), attId->name, sizeof(NAMED)); + NAMED *const nameAddedOrFound + = lookup(parser, &(type->defaultAttsNames), attId->name, sizeof(NAMED)); if (! nameAddedOrFound) return 0; @@ -7230,13 +7327,14 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, } else { int i; for (i = 0; name[i]; i++) { + /* Detect and prevent signed integer overflow */ + if (i == INT_MAX) { + return NULL; + } /* attributes without prefix are *not* in the default namespace */ if (name[i] == XML_T(ASCII_COLON)) { - int j; - for (j = 0; j < i; j++) { - if (! poolAppendChar(&dtd->pool, name[j])) - return NULL; - } + if (! poolAppendChars(&dtd->pool, name, i)) + return NULL; if (! poolAppendChar(&dtd->pool, XML_T('\0'))) return NULL; id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, @@ -7264,46 +7362,39 @@ getContext(XML_Parser parser) { XML_Bool needSep = XML_FALSE; if (dtd->defaultPrefix.binding) { - int i; - int len; if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) return NULL; - len = dtd->defaultPrefix.binding->uriLen; + size_t len = dtd->defaultPrefix.binding->uriLen; if (parser->m_namespaceSeparator) len--; - for (i = 0; i < len; i++) { - if (! poolAppendChar(&parser->m_tempPool, - dtd->defaultPrefix.binding->uri[i])) { - /* Because of memory caching, I don't believe this line can be - * executed. - * - * This is part of a loop copying the default prefix binding - * URI into the parser's temporary string pool. Previously, - * that URI was copied into the same string pool, with a - * terminating NUL character, as part of setContext(). When - * the pool was cleared, that leaves a block definitely big - * enough to hold the URI on the free block list of the pool. - * The URI copy in getContext() therefore cannot run out of - * memory. - * - * If the pool is used between the setContext() and - * getContext() calls, the worst it can do is leave a bigger - * block on the front of the free list. Given that this is - * all somewhat inobvious and program logic can be changed, we - * don't delete the line but we do exclude it from the test - * coverage statistics. - */ - return NULL; /* LCOV_EXCL_LINE */ - } + if (! poolAppendChars(&parser->m_tempPool, dtd->defaultPrefix.binding->uri, + len)) { + /* Because of memory caching, I don't believe this line can be + * executed. + * + * This is part of a loop copying the default prefix binding + * URI into the parser's temporary string pool. Previously, + * that URI was copied into the same string pool, with a + * terminating NUL character, as part of setContext(). When + * the pool was cleared, that leaves a block definitely big + * enough to hold the URI on the free block list of the pool. + * The URI copy in getContext() therefore cannot run out of + * memory. + * + * If the pool is used between the setContext() and + * getContext() calls, the worst it can do is leave a bigger + * block on the front of the free list. Given that this is + * all somewhat inobvious and program logic can be changed, we + * don't delete the line but we do exclude it from the test + * coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ } needSep = XML_TRUE; } hashTableIterInit(&iter, &(dtd->prefixes)); for (;;) { - int i; - int len; - const XML_Char *s; PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); if (! prefix) break; @@ -7318,23 +7409,21 @@ getContext(XML_Parser parser) { } if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) return NULL; - for (s = prefix->name; *s; s++) - if (! poolAppendChar(&parser->m_tempPool, *s)) - return NULL; + if (! poolAppendChars(&parser->m_tempPool, prefix->name, + xcslen(prefix->name))) + return NULL; if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) return NULL; - len = prefix->binding->uriLen; + size_t len = prefix->binding->uriLen; if (parser->m_namespaceSeparator) len--; - for (i = 0; i < len; i++) - if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) - return NULL; + if (! poolAppendChars(&parser->m_tempPool, prefix->binding->uri, len)) + return NULL; needSep = XML_TRUE; } hashTableIterInit(&iter, &(dtd->generalEntities)); for (;;) { - const XML_Char *s; ENTITY *e = (ENTITY *)hashTableIterNext(&iter); if (! e) break; @@ -7342,9 +7431,8 @@ getContext(XML_Parser parser) { continue; if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) return NULL; - for (s = e->name; *s; s++) - if (! poolAppendChar(&parser->m_tempPool, *s)) - return 0; + if (! poolAppendChars(&parser->m_tempPool, e->name, xcslen(e->name))) + return NULL; needSep = XML_TRUE; } @@ -7466,6 +7554,7 @@ dtdCreate(XML_Parser parser) { p->in_eldecl = XML_FALSE; p->scaffIndex = NULL; + p->scaffIndexSize = 0; p->scaffold = NULL; p->scaffLevel = 0; p->scaffSize = 0; @@ -7487,8 +7576,7 @@ dtdReset(DTD *p, XML_Parser parser) { if (! e) break; hashTableDestroy(&(e->defaultAttsNames)); - if (e->allocDefaultAtts != 0) - FREE(parser, e->defaultAtts); + FREE(parser, e->defaultAtts); } hashTableClear(&(p->generalEntities)); #ifdef XML_DTD @@ -7507,6 +7595,7 @@ dtdReset(DTD *p, XML_Parser parser) { FREE(parser, p->scaffIndex); p->scaffIndex = NULL; + p->scaffIndexSize = 0; FREE(parser, p->scaffold); p->scaffold = NULL; @@ -7529,8 +7618,7 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) { if (! e) break; hashTableDestroy(&(e->defaultAttsNames)); - if (e->allocDefaultAtts != 0) - FREE(parser, e->defaultAtts); + FREE(parser, e->defaultAtts); } hashTableDestroy(&(p->generalEntities)); #ifdef XML_DTD @@ -7609,7 +7697,6 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, hashTableIterInit(&iter, &(oldDtd->elementTypes)); for (;;) { - int i; ELEMENT_TYPE *newE; const XML_Char *name; const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); @@ -7627,15 +7714,10 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, hashTableInit(&(newE->defaultAttsNames), parser); if (oldE->nDefaultAtts) { - /* Detect and prevent integer overflow. - * The preprocessor guard addresses the "always false" warning - * from -Wtype-limits on platforms where - * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ -#if UINT_MAX >= SIZE_MAX - if ((size_t)oldE->nDefaultAtts > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { + /* Detect and prevent integer overflow. */ + if (oldE->nDefaultAtts > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { return 0; } -#endif newE->defaultAtts = MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (! newE->defaultAtts) { @@ -7649,7 +7731,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, if (oldE->prefix) newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldE->prefix->name, 0); - for (i = 0; i < newE->nDefaultAtts; i++) { + for (size_t i = 0; i < newE->nDefaultAtts; i++) { const XML_Char *const attributeName = oldE->defaultAtts[i].id->name; newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup( oldParser, &(newDtd->attributeIds), attributeName, 0); @@ -7662,8 +7744,8 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, } else newE->defaultAtts[i].value = NULL; - NAMED *const nameAddedOrFound = (NAMED *)lookup( - parser, &(newE->defaultAttsNames), attributeName, sizeof(NAMED)); + NAMED *const nameAddedOrFound = lookup(parser, &(newE->defaultAttsNames), + attributeName, sizeof(NAMED)); if (! nameAddedOrFound) { return 0; } @@ -7693,6 +7775,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, newDtd->scaffSize = oldDtd->scaffSize; newDtd->scaffLevel = oldDtd->scaffLevel; newDtd->scaffIndex = oldDtd->scaffIndex; + newDtd->scaffIndexSize = oldDtd->scaffIndexSize; return 1; } /* End dtdCopy */ @@ -7764,18 +7847,23 @@ copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, static XML_Bool FASTCALL keyeq(KEY s1, KEY s2) { +#ifdef XML_UNICODE +# ifdef XML_UNICODE_WCHAR_T + return (wcscmp(s1, s2) == 0) ? XML_TRUE : XML_FALSE; +# else for (; *s1 == *s2; s1++, s2++) if (*s1 == 0) return XML_TRUE; return XML_FALSE; +# endif +#else + return (strcmp(s1, s2) == 0) ? XML_TRUE : XML_FALSE; +#endif } static size_t keylen(KEY s) { - size_t len = 0; - for (; *s; s++, len++) - ; - return len; + return xcslen(s); } static void @@ -7993,10 +8081,8 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, const XML_Char *s) { - do { - if (! poolAppendChar(pool, *s)) - return NULL; - } while (*s++); + if (! poolAppendChars(pool, s, xcslen(s) + /*null terminator*/ 1)) + return NULL; s = pool->start; poolFinish(pool); return s; @@ -8035,10 +8121,8 @@ poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { */ return NULL; /* LCOV_EXCL_LINE */ } - for (; n > 0; --n, s++) { - if (! poolAppendChar(pool, *s)) - return NULL; - } + if (n > 0 && ! poolAppendChars(pool, s, n)) + return NULL; s = pool->start; poolFinish(pool); return s; @@ -8046,11 +8130,8 @@ poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, const XML_Char *s) { - while (*s) { - if (! poolAppendChar(pool, *s)) - return NULL; - s++; - } + if (! poolAppendChars(pool, s, xcslen(s))) + return NULL; return pool->start; } @@ -8059,9 +8140,8 @@ poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end) { if (! poolAppend(pool, enc, ptr, end)) return NULL; - if (pool->ptr == pool->end && ! poolGrow(pool)) + if (! poolAppendChar(pool, 0)) return NULL; - *(pool->ptr)++ = 0; return pool->start; } @@ -8196,6 +8276,19 @@ poolGrow(STRING_POOL *pool) { return XML_TRUE; } +static bool FASTCALL +poolGrowUntil(STRING_POOL *pool, size_t needed) { + for (;;) { + const size_t available = pool->end - pool->ptr; + if (available >= needed) { + return true; + } + if (! poolGrow(pool)) { + return false; + } + } +} + static int FASTCALL nextScaffoldPart(XML_Parser parser) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ @@ -8203,18 +8296,14 @@ nextScaffoldPart(XML_Parser parser) { int next; if (! dtd->scaffIndex) { - /* Detect and prevent integer overflow. - * The preprocessor guard addresses the "always false" warning - * from -Wtype-limits on platforms where - * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ -#if UINT_MAX >= SIZE_MAX + /* Detect and prevent integer overflow. */ if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { return -1; } -#endif dtd->scaffIndex = MALLOC(parser, parser->m_groupSize * sizeof(int)); if (! dtd->scaffIndex) return -1; + dtd->scaffIndexSize = parser->m_groupSize; dtd->scaffIndex[0] = 0; } @@ -8375,12 +8464,21 @@ build_model(XML_Parser parser) { const XML_Char *src; dest->name = str; src = dtd->scaffold[src_node].name; - for (;;) { - *str++ = *src; - if (! *src) - break; - src++; + + const size_t nameLen = xcslen(src) + /* null terminator*/ 1; + + // Detect and prevent integer overflow + if (nameLen > SIZE_MAX / sizeof(XML_Char)) { + // NOTE: We are avoiding FREE(..) here because the model + // is not being allocated with MALLOC(..) but with plain + // .malloc_fcn(..). + parser->m_mem.free_fcn(ret); + return NULL; } + + memcpy(str, src, nameLen * sizeof(XML_Char)); + str += nameLen; + dest->numchildren = 0; dest->children = NULL; } else { @@ -8427,22 +8525,24 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, static XML_Char * copyString(const XML_Char *s, XML_Parser parser) { - size_t charsRequired = 0; - XML_Char *result; - /* First determine how long the string is */ - while (s[charsRequired] != 0) { - charsRequired++; - } - /* Include the terminator */ - charsRequired++; + const size_t charsRequired = xcslen(s) + /*null terminator*/ 1; + + /* Detect and prevent integer overflow */ + if (charsRequired > SIZE_MAX / sizeof(XML_Char)) + return NULL; + + const size_t bytesRequired = charsRequired * sizeof(XML_Char); /* Now allocate space for the copy */ - result = MALLOC(parser, charsRequired * sizeof(XML_Char)); + XML_Char *const result = MALLOC(parser, bytesRequired); + if (result == NULL) return NULL; + /* Copy the original into place */ - memcpy(result, s, charsRequired * sizeof(XML_Char)); + memcpy(result, s, bytesRequired); + return result; } @@ -8609,15 +8709,25 @@ entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, const char *const entityName = entity->name; # endif + const bool limitingWanted = rootParser->m_entity_stats.debugLevel < 2; + const int maxLimitedDepth = 10; // somewhat arbitrary + const int candidateIndentDepth + = (int)rootParser->m_entity_stats.currentDepth - 1; + const bool limitingNeeded + = limitingWanted && (candidateIndentDepth > maxLimitedDepth); + const char *const ellipisOrEmpty = limitingNeeded ? " [..] " : ""; + const int indentDepth + = limitingNeeded ? (maxLimitedDepth - /* make space for ellipis */ 2) + : candidateIndentDepth; + fprintf( stderr, - "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n", + "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s%s; %s length %d (xmlparse.c:%d)\n", (void *)rootParser, rootParser->m_entity_stats.countEverOpened, rootParser->m_entity_stats.currentDepth, - rootParser->m_entity_stats.maximumDepthSeen, - ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "", - entity->is_param ? "%" : "&", entityName, action, entity->textLen, - sourceLine); + rootParser->m_entity_stats.maximumDepthSeen, indentDepth * 2, "", + ellipisOrEmpty, entity->is_param ? "%" : "&", entityName, action, + entity->textLen, sourceLine); } static void diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c index 32cd5f147e93226..387c6e44a25f80c 100644 --- a/Modules/expat/xmltok.c +++ b/Modules/expat/xmltok.c @@ -25,6 +25,7 @@ Copyright (c) 2022 Sean McBride Copyright (c) 2023 Hanno Böck Copyright (c) 2025 Alfonso Gregory + Copyright (c) 2026 Nick Begg Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -58,6 +59,7 @@ #endif #include "internal.h" +#include "fallthrough.h" #include "xmltok.h" #include "nametab.h" @@ -641,7 +643,7 @@ unicode_byte_type(char hi, char lo) { *(*toP)++ = lo; \ break; \ } \ - /* fall through */ \ + EXPAT_FALLTHROUGH; \ case 0x1: \ case 0x2: \ case 0x3: \ @@ -1557,7 +1559,7 @@ initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc, case 0xEF: /* possibly first byte of UTF-8 BOM */ if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; - /* fall through */ + EXPAT_FALLTHROUGH; case 0x00: case 0x3C: return XML_TOK_PARTIAL; diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c index 239a2d06c4512ce..eae11bdd968aee4 100644 --- a/Modules/expat/xmltok_impl.c +++ b/Modules/expat/xmltok_impl.c @@ -17,6 +17,7 @@ Copyright (c) 2019 David Loffredo Copyright (c) 2020 Boris Kolpackov Copyright (c) 2022 Martin Ettl + Copyright (c) 2026 Nick Begg Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -83,7 +84,7 @@ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ - /* fall through */ \ + EXPAT_FALLTHROUGH; \ case BT_NMSTRT: \ case BT_HEX: \ case BT_DIGIT: \ @@ -112,7 +113,7 @@ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ - /* fall through */ \ + EXPAT_FALLTHROUGH; \ case BT_NMSTRT: \ case BT_HEX: \ ptr += MINBPC(enc); \ @@ -209,7 +210,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - /* fall through */ + EXPAT_FALLTHROUGH; case BT_S: case BT_CR: case BT_LF: @@ -323,7 +324,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr + MINBPC(enc); return tok; } - /* fall through */ + EXPAT_FALLTHROUGH; default: *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -615,7 +616,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_INVALID; } } - /* fall through */ + EXPAT_FALLTHROUGH; case BT_EQUALS: { int open; # ifdef XML_NS @@ -898,7 +899,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_INVALID; } } - /* fall through */ + EXPAT_FALLTHROUGH; case BT_AMP: case BT_LT: case BT_NONXML: @@ -1059,7 +1060,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, /* indicate that this might be part of a CR/LF pair */ return -XML_TOK_PROLOG_S; } - /* fall through */ + EXPAT_FALLTHROUGH; case BT_S: case BT_LF: for (;;) { @@ -1074,7 +1075,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, /* don't split CR/LF pair */ if (ptr + MINBPC(enc) != end) break; - /* fall through */ + EXPAT_FALLTHROUGH; default: *nextTokPtr = ptr; return XML_TOK_PROLOG_S; @@ -1189,7 +1190,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, tok = XML_TOK_NMTOKEN; break; } - /* fall through */ + EXPAT_FALLTHROUGH; default: *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -1484,7 +1485,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, case BT_NMSTRT: if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; - /* fall through */ + EXPAT_FALLTHROUGH; default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ diff --git a/PCbuild/_elementtree.vcxproj b/PCbuild/_elementtree.vcxproj index 3eb9c89bcb67405..46e34a5ea82d5d0 100644 --- a/PCbuild/_elementtree.vcxproj +++ b/PCbuild/_elementtree.vcxproj @@ -103,9 +103,11 @@ + + diff --git a/PCbuild/_elementtree.vcxproj.filters b/PCbuild/_elementtree.vcxproj.filters index a5368024ccebfd4..f6b55d8ff82a68c 100644 --- a/PCbuild/_elementtree.vcxproj.filters +++ b/PCbuild/_elementtree.vcxproj.filters @@ -33,6 +33,9 @@ Header Files\expat + + Header Files\expat + Header Files\expat @@ -42,6 +45,9 @@ Header Files\expat + + Header Files\expat + Header Files\expat diff --git a/PCbuild/pyexpat.vcxproj b/PCbuild/pyexpat.vcxproj index 8e0f5f6311220a1..8a60ee15eb41bf8 100644 --- a/PCbuild/pyexpat.vcxproj +++ b/PCbuild/pyexpat.vcxproj @@ -95,6 +95,8 @@ + + diff --git a/PCbuild/pyexpat.vcxproj.filters b/PCbuild/pyexpat.vcxproj.filters index fd22fc8c477df02..230531033fccd87 100644 --- a/PCbuild/pyexpat.vcxproj.filters +++ b/PCbuild/pyexpat.vcxproj.filters @@ -12,6 +12,12 @@ + + Header Files + + + Header Files + Header Files diff --git a/Tools/build/.warningignore_macos b/Tools/build/.warningignore_macos index d7b62bc6a43ba65..67f50119db73100 100644 --- a/Tools/build/.warningignore_macos +++ b/Tools/build/.warningignore_macos @@ -3,7 +3,3 @@ # Keep lines sorted lexicographically to help avoid merge conflicts. # Format example: # /path/to/file (number of warnings in file) -Modules/expat/siphash.h 7 -Modules/expat/xmlparse.c 13 -Modules/expat/xmltok.c 3 -Modules/expat/xmltok_impl.c 26