fix: handle URIError when HTML attributes contain % character (#754)

Copilot · quantizor · web-flow · commit c5b625926ced · 2025-12-10T22:39:39.000-05:00
* Initial plan * fix: handle URIError when HTML attributes contain % character (#753) Co-authored-by: quantizor <570070+quantizor@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: quantizor <570070+quantizor@users.noreply.github.com>
diff --git a/.changeset/fix-urierror-html-attributes.md b/.changeset/fix-urierror-html-attributes.md
@@ -0,0 +1,5 @@
+---
+"markdown-to-jsx": patch
+---
+
+Fixed URIError when parsing HTML attributes containing the % character (e.g., `width="100%"`). The parser now gracefully handles invalid URI encodings in attribute values instead of throwing an error.
diff --git a/src/parse.spec.ts b/src/parse.spec.ts
@@ -124,6 +124,22 @@ describe('parser', () => {
       },
     ])
   })
+
+  it('should handle HTML with percent character in attributes without throwing URIError', () => {
+    // Regression test for issue #753: URIError when HTML attributes contain % character
+    const result = p.parser(
+      '<iframe src="https://example.com" width="100%"></iframe>'
+    ) as (MarkdownToJSX.ParagraphNode & { endPos: number })[]
+    expect(result).toHaveLength(1)
+    expect(result[0].type).toBe(RuleType.paragraph)
+    const htmlNode = result[0].children[0] as MarkdownToJSX.HTMLNode
+    expect(htmlNode.type).toBe(RuleType.htmlBlock)
+    expect(htmlNode.tag).toBe('iframe')
+    expect(htmlNode.attrs).toEqual({
+      src: 'https://example.com',
+      width: '100%',
+    })
+  })
 })
 
 describe('parseMarkdown', () => {
@@ -1120,6 +1136,25 @@ describe('parseHTMLTag', () => {
     })
   })
 
+  it('should parse tags with percent character in attributes without throwing URIError', () => {
+    // Regression test for issue #753: URIError when HTML attributes contain % character
+    const result = p.parseHTMLTag(
+      '<iframe src="https://example.com" width="100%"></iframe>',
+      0
+    )
+    expect(result).toEqual({
+      tagName: 'iframe',
+      tagLower: 'iframe',
+      attrs: 'src="https://example.com" width="100%"',
+      whitespaceBeforeAttrs: ' ',
+      isSelfClosing: false,
+      hasSpaceBeforeSlash: false,
+      isClosing: false,
+      hasNewline: false,
+      endPos: 47,
+    })
+  })
+
   it('should parse tags with multiple spaces before attributes', () => {
     const result = p.parseHTMLTag('<div   class="test">', 0)
     expect(result).toEqual({
diff --git a/src/parse.ts b/src/parse.ts
@@ -737,8 +737,19 @@ function parseHTMLAttributes(
     } else if (rawAttr !== 'style')
       result[isJSXComponent ? rawAttr : rawAttr.toLowerCase()] = true
   }
-  if (util.SANITIZE_R.test(decodeURIComponent(attrs)))
-    for (const key in result) delete result[key]
+  // Check for URI-encoded malicious content in the raw attributes string
+  // Only decode if % is present (performance optimization)
+  if (attrs.indexOf('%') !== -1) {
+    try {
+      if (util.SANITIZE_R.test(decodeURIComponent(attrs)))
+        for (var key in result) delete result[key]
+    } catch (e) {
+      // Invalid URI encoding (e.g., "100%") - skip the check
+      // Individual attributes were already sanitized above
+    }
+  } else if (util.SANITIZE_R.test(attrs)) {
+    for (var key in result) delete result[key]
+  }
   return result
 }
 

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"markdown-to-jsx": patch
 +---
++
 +Fixed URIError when parsing HTML attributes containing the % character (e.g., `width="100%"`). The parser now gracefully handles invalid URI encodings in attribute values instead of throwing an error.