Skip to content

Commit c5b6259

Browse files
Copilotquantizor
andauthored
fix: handle URIError when HTML attributes contain % character (#754)
* Initial plan * fix: handle URIError when HTML attributes contain % character (#753) Co-authored-by: quantizor <[email protected]> --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: quantizor <[email protected]>
1 parent 7038496 commit c5b6259

File tree

3 files changed

+53
-2
lines changed

3 files changed

+53
-2
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"markdown-to-jsx": patch
3+
---
4+
5+
Fixed URIError when parsing HTML attributes containing the % character (e.g., `width="100%"`). The parser now gracefully handles invalid URI encodings in attribute values instead of throwing an error.

src/parse.spec.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,22 @@ describe('parser', () => {
124124
},
125125
])
126126
})
127+
128+
it('should handle HTML with percent character in attributes without throwing URIError', () => {
129+
// Regression test for issue #753: URIError when HTML attributes contain % character
130+
const result = p.parser(
131+
'<iframe src="https://example.com" width="100%"></iframe>'
132+
) as (MarkdownToJSX.ParagraphNode & { endPos: number })[]
133+
expect(result).toHaveLength(1)
134+
expect(result[0].type).toBe(RuleType.paragraph)
135+
const htmlNode = result[0].children[0] as MarkdownToJSX.HTMLNode
136+
expect(htmlNode.type).toBe(RuleType.htmlBlock)
137+
expect(htmlNode.tag).toBe('iframe')
138+
expect(htmlNode.attrs).toEqual({
139+
src: 'https://example.com',
140+
width: '100%',
141+
})
142+
})
127143
})
128144

129145
describe('parseMarkdown', () => {
@@ -1120,6 +1136,25 @@ describe('parseHTMLTag', () => {
11201136
})
11211137
})
11221138

1139+
it('should parse tags with percent character in attributes without throwing URIError', () => {
1140+
// Regression test for issue #753: URIError when HTML attributes contain % character
1141+
const result = p.parseHTMLTag(
1142+
'<iframe src="https://example.com" width="100%"></iframe>',
1143+
0
1144+
)
1145+
expect(result).toEqual({
1146+
tagName: 'iframe',
1147+
tagLower: 'iframe',
1148+
attrs: 'src="https://example.com" width="100%"',
1149+
whitespaceBeforeAttrs: ' ',
1150+
isSelfClosing: false,
1151+
hasSpaceBeforeSlash: false,
1152+
isClosing: false,
1153+
hasNewline: false,
1154+
endPos: 47,
1155+
})
1156+
})
1157+
11231158
it('should parse tags with multiple spaces before attributes', () => {
11241159
const result = p.parseHTMLTag('<div class="test">', 0)
11251160
expect(result).toEqual({

src/parse.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -737,8 +737,19 @@ function parseHTMLAttributes(
737737
} else if (rawAttr !== 'style')
738738
result[isJSXComponent ? rawAttr : rawAttr.toLowerCase()] = true
739739
}
740-
if (util.SANITIZE_R.test(decodeURIComponent(attrs)))
741-
for (const key in result) delete result[key]
740+
// Check for URI-encoded malicious content in the raw attributes string
741+
// Only decode if % is present (performance optimization)
742+
if (attrs.indexOf('%') !== -1) {
743+
try {
744+
if (util.SANITIZE_R.test(decodeURIComponent(attrs)))
745+
for (var key in result) delete result[key]
746+
} catch (e) {
747+
// Invalid URI encoding (e.g., "100%") - skip the check
748+
// Individual attributes were already sanitized above
749+
}
750+
} else if (util.SANITIZE_R.test(attrs)) {
751+
for (var key in result) delete result[key]
752+
}
742753
return result
743754
}
744755

0 commit comments

Comments
 (0)