diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 40ce721a..2ca433a4 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -15,9 +15,9 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/lib/markdown2.py b/lib/markdown2.py index dc698970..ffaa0527 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1421,13 +1421,13 @@ def _unhash_html_spans(self, text: str, spans=True, code=False) -> str: ''' orig = '' while text != orig: + orig = text if spans: for key, sanitized in list(self.html_spans.items()): text = text.replace(key, sanitized) if code: for code, key in list(self._code_table.items()): text = text.replace(key, code) - orig = text return text def _sanitize_html(self, s: str) -> str: @@ -1518,6 +1518,12 @@ def _protect_url(self, url: str) -> str: mime = data_url.group('mime') or '' if mime.startswith('image/') and data_url.group('token') == ';base64': charset='base64' + else: + url = ( + self._unhash_html_spans(url, code=True) + .replace('*', self._escape_table['*']) + .replace('_', self._escape_table['_']) + ) url = _html_escape_url(url, safe_mode=self.safe_mode, charset=charset) key = _hash_text(url) self._escape_table[url] = key @@ -1537,8 +1543,10 @@ def _safe_href(self): safe = r'-\w' # omitted ['"<>] for XSS reasons less_safe = r'#/\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~' + # html encoded colon in a URL still functions as a normal colon, so need to detect those + protocol_seperators = [':', ':', ':', ':'] # dot seperated hostname, optional port number, not followed by protocol seperator - domain = r'(?:[{}]+(?:\.[{}]+)*)(?:(?<code>" onerror="alert(1)//</code>

A

+ +

x

+ +

x

+ + + +

+ +

![x](<"`"x

diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 4a5c25a8..3693c6c5 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -2,4 +2,17 @@ ![`" onerror="alert(1)//`]() -![A](B "") \ No newline at end of file +![A](B "") + +[x](javascript:alert(origin)) + +[x](javascript:1/alert(origin)) + +- +- ![](x '`![](`') onerror=alert(origin) ) + +![](``) + +![x](<"`"![x][id] +[id]: x "`