From 4af4265d581eb4be7dc66dca027f1ab5f0dfa481 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Tue, 18 Jun 2024 19:58:04 +0900 Subject: [PATCH 1/4] Reject unclosed DOCTYPE on parsing Fix https://github.com/ruby/rexml/issues/152 --- lib/rexml/parsers/treeparser.rb | 32 ++++++++----- test/parse/test_document_type_declaration.rb | 47 ++++++++++++++++++++ 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index bf9a4254..9ddb54db 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -24,12 +24,20 @@ def parse #STDERR.puts "TREEPARSER GOT #{event.inspect}" case event[0] when :end_document + if in_doctype + raise ParseException.new("Malformed DOCTYPE: unclosed", + @parser.source, @parser) + end unless tag_stack.empty? raise ParseException.new("No close tag for #{@build_context.xpath}", @parser.source, @parser) end return when :start_element + if in_doctype + raise ParseException.new("Malformed DOCTYPE: unclosed", + @parser.source, @parser) + end tag_stack.push(event[1]) el = @build_context = @build_context.add_element( event[1] ) event[2].each do |key, value| @@ -39,17 +47,19 @@ def parse tag_stack.pop @build_context = @build_context.parent when :text - if not in_doctype - if @build_context[-1].instance_of? Text - @build_context[-1] << event[1] - else - @build_context.add( - Text.new(event[1], @build_context.whitespace, nil, true) - ) unless ( - @build_context.ignore_whitespace_nodes and - event[1].strip.size==0 - ) - end + if in_doctype + raise ParseException.new("Malformed DOCTYPE: unclosed", + @parser.source, @parser) + end + if @build_context[-1].instance_of? Text + @build_context[-1] << event[1] + else + @build_context.add( + Text.new(event[1], @build_context.whitespace, nil, true) + ) unless ( + @build_context.ignore_whitespace_nodes and + event[1].strip.size==0 + ) end when :comment c = Comment.new( event[1] ) diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb index 8faa0b78..ec069b1d 100644 --- a/test/parse/test_document_type_declaration.rb +++ b/test/parse/test_document_type_declaration.rb @@ -53,6 +53,53 @@ def test_no_name end end + class TestUnclosed < self + def test_unclosed_doctype_only + exception = assert_raise(REXML::ParseException) do + REXML::Document.new(<<~DOCTYPE) + + DOCTYPE + end + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed DOCTYPE: unclosed + Line: 1 + Position: 20 + Last 80 unconsumed characters: + #{' '} + DETAIL + end + + def test_unclosed_doctype_plus_text + exception = assert_raise(REXML::ParseException) do + REXML::Document.new(<<~DOCTYPE) + Date: Wed, 19 Jun 2024 11:04:23 +0900 Subject: [PATCH 2/4] Update test names Suggested by @kou. Thank you. Co-authored-by: Sutou Kouhei --- test/parse/test_document_type_declaration.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb index ec069b1d..8a726476 100644 --- a/test/parse/test_document_type_declaration.rb +++ b/test/parse/test_document_type_declaration.rb @@ -54,7 +54,7 @@ def test_no_name end class TestUnclosed < self - def test_unclosed_doctype_only + def test_no_extra_node exception = assert_raise(REXML::ParseException) do REXML::Document.new(<<~DOCTYPE) @@ -84,7 +84,7 @@ def test_unclosed_doctype_plus_start_element DETAIL end - def test_unclosed_doctype_plus_text + def test_text exception = assert_raise(REXML::ParseException) do REXML::Document.new(<<~DOCTYPE) Date: Wed, 19 Jun 2024 11:21:43 +0900 Subject: [PATCH 3/4] Fix to handle unclosed doctype decls in `BaseParser` See https://github.com/ruby/rexml/pull/153#discussion_r1645228365 --- lib/rexml/parsers/baseparser.rb | 10 +++++++++- lib/rexml/parsers/treeparser.rb | 12 ------------ test/parse/test_document_type_declaration.rb | 14 ++++++-------- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 272d8a6b..5791ab1d 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -216,7 +216,12 @@ def pull_event x, @closed = @closed, nil return [ :end_element, x ] end - return [ :end_document ] if empty? + if empty? + if @document_status == :in_doctype + raise ParseException.new("Malformed DOCTYPE: unclosed", @source) + end + return [ :end_document ] + end return @stack.shift if @stack.size > 0 #STDERR.puts @source.encoding #STDERR.puts "BUFFER = #{@source.buffer.inspect}" @@ -373,6 +378,9 @@ def pull_event @document_status = :after_doctype return [ :end_doctype ] end + if @document_status == :in_doctype + raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source) + end end if @document_status == :after_doctype @source.match(/\s*/um, true) diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index 9ddb54db..04dddea9 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -24,20 +24,12 @@ def parse #STDERR.puts "TREEPARSER GOT #{event.inspect}" case event[0] when :end_document - if in_doctype - raise ParseException.new("Malformed DOCTYPE: unclosed", - @parser.source, @parser) - end unless tag_stack.empty? raise ParseException.new("No close tag for #{@build_context.xpath}", @parser.source, @parser) end return when :start_element - if in_doctype - raise ParseException.new("Malformed DOCTYPE: unclosed", - @parser.source, @parser) - end tag_stack.push(event[1]) el = @build_context = @build_context.add_element( event[1] ) event[2].each do |key, value| @@ -47,10 +39,6 @@ def parse tag_stack.pop @build_context = @build_context.parent when :text - if in_doctype - raise ParseException.new("Malformed DOCTYPE: unclosed", - @parser.source, @parser) - end if @build_context[-1].instance_of? Text @build_context[-1] << event[1] else diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb index 8a726476..3ca0b536 100644 --- a/test/parse/test_document_type_declaration.rb +++ b/test/parse/test_document_type_declaration.rb @@ -56,14 +56,12 @@ def test_no_name class TestUnclosed < self def test_no_extra_node exception = assert_raise(REXML::ParseException) do - REXML::Document.new(<<~DOCTYPE) - #{' '} DETAIL end @@ -91,11 +89,11 @@ def test_text DOCTYPE end assert_equal(<<~DETAIL.chomp, exception.to_s) - Malformed DOCTYPE: unclosed + Malformed DOCTYPE: invalid declaration Line: 1 Position: 21 Last 80 unconsumed characters: - + text#{' '} DETAIL end end From f9be6b0e323a199cf02feb472e5ae4123ae1ab40 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Wed, 19 Jun 2024 11:23:19 +0900 Subject: [PATCH 4/4] `in_doctype` is no longer used --- lib/rexml/parsers/treeparser.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index 04dddea9..0cb6f7cc 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -16,7 +16,6 @@ def add_listener( listener ) def parse tag_stack = [] - in_doctype = false entities = nil begin while true @@ -58,14 +57,12 @@ def parse when :processing_instruction @build_context.add( Instruction.new( event[1], event[2] ) ) when :end_doctype - in_doctype = false entities.each { |k,v| entities[k] = @build_context.entities[k].value } @build_context = @build_context.parent when :start_doctype doctype = DocType.new( event[1..-1], @build_context ) @build_context = doctype entities = {} - in_doctype = true when :attlistdecl n = AttlistDecl.new( event[1..-1] ) @build_context.add( n )