Skip to content

Commit 4ca0ee6

Browse files
Fix hang on UTF-16 LE BOM file (#636)
Co-authored-by: chrchr-github <noreply@github.com>
1 parent cc67864 commit 4ca0ee6

2 files changed

Lines changed: 20 additions & 5 deletions

File tree

integration_test.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,4 +502,17 @@ def test_define(record_property, tmpdir): # #589
502502

503503
assert exitcode == 0
504504
assert stderr == "test.cpp:1: syntax error: failed to expand 'TEST_P', Invalid ## usage when expanding 'TEST_P': Unexpected token ')'\n"
505-
assert stdout == '\n'
505+
assert stdout == '\n'
506+
507+
def test_utf16_bom(tmpdir):
508+
test_file = os.path.join(tmpdir, "test.cpp")
509+
with open(test_file, 'wb') as f:
510+
f.write(b'\xFF\xFE\x3B\x00')
511+
512+
args = [test_file]
513+
514+
exitcode, stdout, stderr = simplecpp(args, cwd=tmpdir)
515+
516+
assert exitcode == 0
517+
assert stderr == ''
518+
assert stdout == ';\n'

simplecpp.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -275,8 +275,10 @@ class simplecpp::TokenList::Stream {
275275
return ch;
276276
}
277277

278-
unsigned char peekChar() {
279-
auto ch = static_cast<unsigned char>(peek());
278+
int peekChar() {
279+
int ch = peek();
280+
if (ch == EOF)
281+
return ch;
280282

281283
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
282284
// character is non-ASCII character then replace it with 0xff
@@ -285,7 +287,7 @@ class simplecpp::TokenList::Stream {
285287
const auto ch2 = static_cast<unsigned char>(peek());
286288
unget();
287289
const int ch16 = makeUtf16Char(ch, ch2);
288-
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
290+
ch = (ch16 >= 0x80) ? 0xff : ch16;
289291
}
290292

291293
// Handling of newlines..
@@ -598,7 +600,7 @@ std::string simplecpp::TokenList::stringify(bool linenrs) const
598600
return ret.str();
599601
}
600602

601-
static bool isNameChar(unsigned char ch)
603+
static bool isNameChar(int ch)
602604
{
603605
return std::isalnum(ch) || ch == '_' || ch == '$';
604606
}

0 commit comments

Comments
 (0)