Skip to content

Commit 87bb302

Browse files
committed
gh-152204: Validate date fields in pure-Python date.fromisoformat
The pure-Python _parse_isoformat_date read each fixed-width field with int() on a slice, which silently accepts a leading sign or whitespace, or a short slice that runs off the end of the string. Malformed basic-format inputs such as '2020+12' or '2020061' were therefore parsed into a wrong-but-plausible date instead of raising, while the C accelerator rejects them via parse_digits(). Validate that each field slice is exactly N ASCII digits before converting.
1 parent 11c241e commit 87bb302

3 files changed

Lines changed: 29 additions & 5 deletions

File tree

Lib/_pydatetime.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -360,14 +360,23 @@ def _parse_isoformat_date(dtstr):
360360
# see the comment on Modules/_datetimemodule.c:_find_isoformat_datetime_separator
361361
if len(dtstr) not in (7, 8, 10):
362362
raise ValueError("Invalid isoformat string")
363-
year = int(dtstr[0:4])
363+
def _read(s, n):
364+
# Each date field is a fixed width of exactly n ASCII digits. int()
365+
# would otherwise accept a leading sign or whitespace, or a short slice
366+
# that runs off the end of the string, so validate before converting
367+
# (this is what the C accelerator's parse_digits() enforces).
368+
if len(s) != n or not all(map(_is_ascii_digit, s)):
369+
raise ValueError(f"Invalid isoformat string: {dtstr!r}")
370+
return int(s)
371+
372+
year = _read(dtstr[0:4], 4)
364373
has_sep = dtstr[4] == '-'
365374

366375
pos = 4 + has_sep
367376
if dtstr[pos:pos + 1] == "W":
368377
# YYYY-?Www-?D?
369378
pos += 1
370-
weekno = int(dtstr[pos:pos + 2])
379+
weekno = _read(dtstr[pos:pos + 2], 2)
371380
pos += 2
372381

373382
dayno = 1
@@ -377,17 +386,17 @@ def _parse_isoformat_date(dtstr):
377386

378387
pos += has_sep
379388

380-
dayno = int(dtstr[pos:pos + 1])
389+
dayno = _read(dtstr[pos:pos + 1], 1)
381390

382391
return list(_isoweek_to_gregorian(year, weekno, dayno))
383392
else:
384-
month = int(dtstr[pos:pos + 2])
393+
month = _read(dtstr[pos:pos + 2], 2)
385394
pos += 2
386395
if (dtstr[pos:pos + 1] == "-") != has_sep:
387396
raise ValueError("Inconsistent use of dash separator")
388397

389398
pos += has_sep
390-
day = int(dtstr[pos:pos + 2])
399+
day = _read(dtstr[pos:pos + 2], 2)
391400

392401
return [year, month, day]
393402

Lib/test/datetimetester.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2106,6 +2106,16 @@ def test_fromisoformat_fails(self):
21062106
'10000-W25-1', # Invalid year
21072107
'2020-W25-0', # Invalid day-of-week
21082108
'2020-W25-8', # Invalid day-of-week
2109+
# gh-152204: each fixed-width field must be exactly N ASCII digits
2110+
'2020+12', # '+' accepted in a basic-format field
2111+
'2020 12', # space accepted in a basic-format field
2112+
'+020-06-15', # leading sign in the year
2113+
'202012+9', # '+' in the day field
2114+
'2020-W 5', # space in the week day-of-week field
2115+
'2020061', # 7 chars: day slice reads a 1-character tail
2116+
'2020123', # 7 chars: day slice reads a 1-character tail
2117+
'9999121', # 7 chars: day slice reads a 1-character tail
2118+
'2020-W2', # 1-digit week number
21092119
'٢025-03-09' # Unicode characters
21102120
'2009\ud80002\ud80028', # Separators are surrogate codepoints
21112121
]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix the pure-Python implementation of :meth:`datetime.date.fromisoformat`
2+
silently mis-parsing some malformed ISO 8601 basic-format dates (for example
3+
``'2020+12'`` or ``'2020061'``). Each fixed-width field is now required to be
4+
exactly *N* ASCII digits before conversion, matching the C implementation,
5+
which already rejected these inputs.

0 commit comments

Comments
 (0)