diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
index 40ce721a..2ca433a4 100644
--- a/.github/workflows/python.yaml
+++ b/.github/workflows/python.yaml
@@ -15,9 +15,9 @@ jobs:
- macos-latest
- windows-latest
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
diff --git a/lib/markdown2.py b/lib/markdown2.py
index dc698970..ffaa0527 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -1421,13 +1421,13 @@ def _unhash_html_spans(self, text: str, spans=True, code=False) -> str:
'''
orig = ''
while text != orig:
+ orig = text
if spans:
for key, sanitized in list(self.html_spans.items()):
text = text.replace(key, sanitized)
if code:
for code, key in list(self._code_table.items()):
text = text.replace(key, code)
- orig = text
return text
def _sanitize_html(self, s: str) -> str:
@@ -1518,6 +1518,12 @@ def _protect_url(self, url: str) -> str:
mime = data_url.group('mime') or ''
if mime.startswith('image/') and data_url.group('token') == ';base64':
charset='base64'
+ else:
+ url = (
+ self._unhash_html_spans(url, code=True)
+ .replace('*', self._escape_table['*'])
+ .replace('_', self._escape_table['_'])
+ )
url = _html_escape_url(url, safe_mode=self.safe_mode, charset=charset)
key = _hash_text(url)
self._escape_table[url] = key
@@ -1537,8 +1543,10 @@ def _safe_href(self):
safe = r'-\w'
# omitted ['"<>] for XSS reasons
less_safe = r'#/\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~'
+ # html encoded colon in a URL still functions as a normal colon, so need to detect those
+ protocol_seperators = [':', ':', ':', ':']
# dot seperated hostname, optional port number, not followed by protocol seperator
- domain = r'(?:[{}]+(?:\.[{}]+)*)(?:(?
![x](<"`"