Skip to content

Fix to not allow parameter entity references at internal subsets #191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 5 additions & 47 deletions lib/rexml/entity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ class Entity < Child
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
NDATADECL = "\\s+NDATA\\s+#{NAME}"
PEREFERENCE = "%#{NAME};"
PEREFERENCE_RE = /#{PEREFERENCE}/um
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um

attr_reader :name, :external, :ref, :ndata, :pubid
attr_reader :name, :external, :ref, :ndata, :pubid, :value

# Create a new entity. Simple entities can be constructed by passing a
# name, value to the constructor; this creates a generic, plain entity
Expand Down Expand Up @@ -68,14 +69,11 @@ def Entity::matches? string
end

# Evaluates to the unnormalized value of this entity; that is, replacing
# all entities -- both %ent; and &ent; entities. This differs from
# +value()+ in that +value+ only replaces %ent; entities.
# &ent; entities.
def unnormalized
document.record_entity_expansion unless document.nil?
v = value()
return nil if v.nil?
@unnormalized = Text::unnormalize(v, parent)
@unnormalized
return nil if @value.nil?
@unnormalized = Text::unnormalize(@value, parent)
end

#once :unnormalized
Expand Down Expand Up @@ -121,46 +119,6 @@ def to_s
write rv
rv
end

PEREFERENCE_RE = /#{PEREFERENCE}/um
# Returns the value of this entity. At the moment, only internal entities
# are processed. If the value contains internal references (IE,
# %blah;), those are replaced with their values. IE, if the doctype
# contains:
# <!ENTITY % foo "bar">
# <!ENTITY yada "nanoo %foo; nanoo>
# then:
# doctype.entity('yada').value #-> "nanoo bar nanoo"
def value
@resolved_value ||= resolve_value
end

def parent=(other)
@resolved_value = nil
super
end

private
def resolve_value
return nil if @value.nil?
return @value unless @value.match?(PEREFERENCE_RE)

matches = @value.scan(PEREFERENCE_RE)
rv = @value.clone
if @parent
sum = 0
matches.each do |entity_reference|
entity_value = @parent.entity( entity_reference[0] )
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
else
sum += entity_value.bytesize
end
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
end
end
rv
end
end

# This is a set of entity constants -- the ones defined in the XML
Expand Down
3 changes: 3 additions & 0 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ class BaseParser
}

module Private
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
Expand Down Expand Up @@ -350,6 +351,8 @@ def pull_event
match[4] = match[4][1..-2] # HREF
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
else
match[2] = match[2][1..-2]
match.pop if match.size == 4
Expand Down
50 changes: 0 additions & 50 deletions test/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,56 +147,6 @@ def test_entity_expansion_text_limit
assert_equal(90, doc.root.children.first.value.bytesize)
end
end

class ParameterEntityTest < self
def test_have_value
xml = <<EOF
<!DOCTYPE root [
<!ENTITY % a "BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.">
<!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;">
<!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;">
<!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;">
<!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;">
<!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;">
<!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;">
<!ENTITY test "test %g;">
]>
<cd></cd>
EOF

assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
REXML::Security.entity_expansion_limit = 100
assert_equal(100, REXML::Security.entity_expansion_limit)
assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
end

def test_empty_value
xml = <<EOF
<!DOCTYPE root [
<!ENTITY % a "">
<!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;">
<!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;">
<!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;">
<!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;">
<!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;">
<!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;">
<!ENTITY test "test %g;">
]>
<cd></cd>
EOF

REXML::Document.new(xml)
REXML::Security.entity_expansion_limit = 90
assert_equal(90, REXML::Security.entity_expansion_limit)
assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
end
end
end

def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source
Expand Down
102 changes: 81 additions & 21 deletions test/test_entity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ def test_parse_entity

def test_constructor
one = [ %q{<!ENTITY % YN '"Yes"'>},
%q{<!ENTITY % YN2 "Yes">},
%q{<!ENTITY WhatHeSaid "He said %YN;">},
%q{<!ENTITY WhatHeSaid 'He said "Yes"'>},
'<!ENTITY open-hatch
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">',
'<!ENTITY open-hatch2
Expand All @@ -71,8 +70,7 @@ def test_constructor
NDATA gif>' ]
source = %q{<!DOCTYPE foo [
<!ENTITY % YN '"Yes"'>
<!ENTITY % YN2 "Yes">
<!ENTITY WhatHeSaid "He said %YN;">
<!ENTITY WhatHeSaid 'He said "Yes"'>
<!ENTITY open-hatch
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
<!ENTITY open-hatch2
Expand Down Expand Up @@ -104,6 +102,84 @@ def test_replace_entities
assert_equal source, out
end

def test_readers_with_reference
entity = REXML::Entity.new([:entitydecl, "a", "B", "%"])
assert_equal([
'<!ENTITY % a "B">',
"a",
"B",
"B",
"B",
],
[
entity.to_s,
entity.name,
entity.value,
entity.normalized,
entity.unnormalized,
])
end

def test_readers_without_reference
entity = REXML::Entity.new([:entitydecl, "a", "&b;"])
assert_equal([
'<!ENTITY a "&b;">',
"a",
"&b;",
"&b;",
"&b;",
],
[
entity.to_s,
entity.name,
entity.value,
entity.normalized,
entity.unnormalized,
])
end

def test_readers_with_nested_references
doctype = REXML::DocType.new('root')
doctype.add(REXML::Entity.new([:entitydecl, "a", "&b;"]))
doctype.add(REXML::Entity.new([:entitydecl, "b", "X"]))
assert_equal([
"a",
"&b;",
"&b;",
"X",
"b",
"X",
"X",
"X",
],
[
doctype.entities["a"].name,
doctype.entities["a"].value,
doctype.entities["a"].normalized,
doctype.entities["a"].unnormalized,
doctype.entities["b"].name,
doctype.entities["b"].value,
doctype.entities["b"].normalized,
doctype.entities["b"].unnormalized,
])
end

def test_parameter_entity_reference_forbidden_by_internal_subset_in_parser
source = '<!DOCTYPE root [ <!ENTITY % a "B" > <!ENTITY c "%a;" > ]><root/>'
parser = REXML::Parsers::BaseParser.new(source)
exception = assert_raise(REXML::ParseException) do
while parser.has_next?
parser.pull
end
end
assert_equal(<<-DETAIL, exception.to_s)
Parameter entity references forbidden in internal subset: "%a;"
Line: 1
Position: 54
Last 80 unconsumed characters:
DETAIL
end

def test_entity_string_limit
template = '<!DOCTYPE bomb [ <!ENTITY a "^" > ]> <bomb>$</bomb>'
len = 5120 # 5k per entity
Expand All @@ -122,22 +198,6 @@ def test_entity_string_limit
end
end

def test_entity_string_limit_for_parameter_entity
template = '<!DOCTYPE bomb [ <!ENTITY % a "^" > <!ENTITY bomb "$" > ]><root/>'
len = 5120 # 5k per entity
template.sub!(/\^/, "B" * len)

# 10k is OK
entities = '%a;' * 2 # 5k entity * 2 = 10k
REXML::Document.new(template.sub(/\$/, entities))

# above 10k explodes
entities = '%a;' * 3 # 5k entity * 2 = 15k
assert_raise(REXML::ParseException) do
REXML::Document.new(template.sub(/\$/, entities))
end
end

def test_raw
source = '<!DOCTYPE foo [
<!ENTITY ent "replace">
Expand All @@ -161,7 +221,7 @@ def test_lazy_evaluation
def test_entity_replacement
source = %q{<!DOCTYPE foo [
<!ENTITY % YN '"Yes"'>
<!ENTITY WhatHeSaid "He said %YN;">]>
<!ENTITY WhatHeSaid 'He said "Yes"'>]>
<a>&WhatHeSaid;</a>}

d = REXML::Document.new( source )
Expand Down