Skip to content

Improve BaseParser#unnormalize #194

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 39 additions & 14 deletions lib/rexml/parsers/baseparser.rb
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice if the bytesize would be present in the 'raise' statement, so that one can at least adjust the @@entity_expansion_text_limit.

"entity expansion has grown too large: size: XY exceeded @@entity_expansion_text_limit"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'll be helpful but let's work on it in a separated PR.

Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@

module REXML
module Parsers
unless [].respond_to?(:tally)
module EnumerableTally
refine Enumerable do
def tally
counts = {}
each do |item|
counts[item] ||= 0
counts[item] += 1
end
counts
end
end
end
using EnumerableTally
end

if StringScanner::Version < "3.0.8"
module StringScannerCaptures
refine StringScanner do
Expand Down Expand Up @@ -547,20 +563,29 @@ def unnormalize( string, entities=nil, filter=nil )
[Integer(m)].pack('U*')
}
matches.collect!{|x|x[0]}.compact!
if filter
matches.reject! do |entity_reference|
filter.include?(entity_reference)
end
end
if matches.size > 0
matches.each do |entity_reference|
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
rv.gsub!( re, entity_value )
if rv.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
end
else
er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er
matches.tally.each do |entity_reference, n|
entity_expansion_count_before = @entity_expansion_count
entity_value = entity( entity_reference, entities )
if entity_value
if n > 1
entity_expansion_count_delta =
@entity_expansion_count - entity_expansion_count_before
record_entity_expansion(entity_expansion_count_delta * (n - 1))
end
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
rv.gsub!( re, entity_value )
if rv.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
end
else
er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er
end
end
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
Expand All @@ -570,8 +595,8 @@ def unnormalize( string, entities=nil, filter=nil )

private

def record_entity_expansion
@entity_expansion_count += 1
def record_entity_expansion(delta=1)
@entity_expansion_count += delta
if @entity_expansion_count > Security.entity_expansion_limit
raise "number of entity expansions exceeded, processing aborted."
end
Expand Down
14 changes: 8 additions & 6 deletions test/test_pullparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -206,21 +206,23 @@ def test_empty_value
</member>
XML

REXML::Security.entity_expansion_limit = 100000
parser = REXML::Parsers::PullParser.new(source)
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
while parser.has_next?
parser.pull
end
while parser.has_next?
parser.pull
end
assert_equal(11111, parser.entity_expansion_count)

REXML::Security.entity_expansion_limit = 100
REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
parser = REXML::Parsers::PullParser.new(source)
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
while parser.has_next?
parser.pull
end
end
assert_equal(101, parser.entity_expansion_count)
assert do
parser.entity_expansion_count > @default_entity_expansion_limit
end
end

def test_with_default_entity
Expand Down
12 changes: 7 additions & 5 deletions test/test_sax.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,17 +147,19 @@ def test_empty_value
</member>
XML

REXML::Security.entity_expansion_limit = 100000
sax = REXML::Parsers::SAX2Parser.new(source)
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
sax.parse
end
sax.parse
assert_equal(11111, sax.entity_expansion_count)

REXML::Security.entity_expansion_limit = 100
REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
sax = REXML::Parsers::SAX2Parser.new(source)
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
sax.parse
end
assert_equal(101, sax.entity_expansion_count)
assert do
sax.entity_expansion_count > @default_entity_expansion_limit
end
end

def test_with_default_entity
Expand Down