Skip to content

Commit 5d75caf

Browse files
committed
Fix to not allow Parameter Entity References at internal subsets.
## Why? In the internal subset of DTD, references to parameter entities are not allowed within markup declarations. See: https://www.w3.org/TR/xml/#wfc-PEinInternalSubset > Well-formedness constraint: PEs in Internal Subset > In the internal DTD subset, parameter-entity references MUST NOT occur within markup declarations; they may occur where markup declarations can occur. (This does not apply to references that occur in external parameter entities or to the external subset.)
1 parent 9fcad51 commit 5d75caf

File tree

4 files changed

+37
-118
lines changed

4 files changed

+37
-118
lines changed

lib/rexml/entity.rb

Lines changed: 9 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,15 @@ class Entity < Child
1212
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
1313
NDATADECL = "\\s+NDATA\\s+#{NAME}"
1414
PEREFERENCE = "%#{NAME};"
15+
PEREFERENCE_RE = /#{PEREFERENCE}/um
1516
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
1617
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
1718
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
1819
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
1920
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
2021
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
2122

22-
attr_reader :name, :external, :ref, :ndata, :pubid
23+
attr_reader :name, :external, :ref, :ndata, :pubid, :value
2324

2425
# Create a new entity. Simple entities can be constructed by passing a
2526
# name, value to the constructor; this creates a generic, plain entity
@@ -59,6 +60,10 @@ def initialize stream, value=nil, parent=nil, reference=false
5960
@name = stream
6061
@value = value
6162
end
63+
64+
if PEREFERENCE_RE.match?(@value)
65+
raise "Parameter Entity References forbidden in internal subset: #{@value}"
66+
end
6267
end
6368

6469
# Evaluates whether the given string matches an entity definition,
@@ -68,14 +73,11 @@ def Entity::matches? string
6873
end
6974

7075
# Evaluates to the unnormalized value of this entity; that is, replacing
71-
# all entities -- both %ent; and &ent; entities. This differs from
72-
# +value()+ in that +value+ only replaces %ent; entities.
76+
# &ent; entities.
7377
def unnormalized
7478
document.record_entity_expansion unless document.nil?
75-
v = value()
76-
return nil if v.nil?
77-
@unnormalized = Text::unnormalize(v, parent)
78-
@unnormalized
79+
return nil if @value.nil?
80+
@unnormalized = Text::unnormalize(@value, parent)
7981
end
8082

8183
#once :unnormalized
@@ -121,46 +123,6 @@ def to_s
121123
write rv
122124
rv
123125
end
124-
125-
PEREFERENCE_RE = /#{PEREFERENCE}/um
126-
# Returns the value of this entity. At the moment, only internal entities
127-
# are processed. If the value contains internal references (IE,
128-
# %blah;), those are replaced with their values. IE, if the doctype
129-
# contains:
130-
# <!ENTITY % foo "bar">
131-
# <!ENTITY yada "nanoo %foo; nanoo>
132-
# then:
133-
# doctype.entity('yada').value #-> "nanoo bar nanoo"
134-
def value
135-
@resolved_value ||= resolve_value
136-
end
137-
138-
def parent=(other)
139-
@resolved_value = nil
140-
super
141-
end
142-
143-
private
144-
def resolve_value
145-
return nil if @value.nil?
146-
return @value unless @value.match?(PEREFERENCE_RE)
147-
148-
matches = @value.scan(PEREFERENCE_RE)
149-
rv = @value.clone
150-
if @parent
151-
sum = 0
152-
matches.each do |entity_reference|
153-
entity_value = @parent.entity( entity_reference[0] )
154-
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
155-
raise "entity expansion has grown too large"
156-
else
157-
sum += entity_value.bytesize
158-
end
159-
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
160-
end
161-
end
162-
rv
163-
end
164126
end
165127

166128
# This is a set of entity constants -- the ones defined in the XML

lib/rexml/parsers/baseparser.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ class BaseParser
125125
}
126126

127127
module Private
128+
PEREFERENCE_RE = /#{PEREFERENCE}/um
128129
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
129130
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
130131
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
@@ -334,6 +335,8 @@ def pull_event
334335
match[4] = match[4][1..-2] # HREF
335336
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
336337
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
338+
elsif Private::PEREFERENCE_RE.match?(match[2])
339+
raise REXML::ParseException.new("Parameter Entity References forbidden in internal subset: #{match[2]}", @source)
337340
else
338341
match[2] = match[2][1..-2]
339342
match.pop if match.size == 4

test/test_document.rb

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -147,56 +147,6 @@ def test_entity_expansion_text_limit
147147
assert_equal(90, doc.root.children.first.value.bytesize)
148148
end
149149
end
150-
151-
class ParameterEntityTest < self
152-
def test_have_value
153-
xml = <<EOF
154-
<!DOCTYPE root [
155-
<!ENTITY % a "BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.">
156-
<!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;">
157-
<!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;">
158-
<!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;">
159-
<!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;">
160-
<!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;">
161-
<!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;">
162-
<!ENTITY test "test %g;">
163-
]>
164-
<cd></cd>
165-
EOF
166-
167-
assert_raise(REXML::ParseException) do
168-
REXML::Document.new(xml)
169-
end
170-
REXML::Security.entity_expansion_limit = 100
171-
assert_equal(100, REXML::Security.entity_expansion_limit)
172-
assert_raise(REXML::ParseException) do
173-
REXML::Document.new(xml)
174-
end
175-
end
176-
177-
def test_empty_value
178-
xml = <<EOF
179-
<!DOCTYPE root [
180-
<!ENTITY % a "">
181-
<!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;">
182-
<!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;">
183-
<!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;">
184-
<!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;">
185-
<!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;">
186-
<!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;">
187-
<!ENTITY test "test %g;">
188-
]>
189-
<cd></cd>
190-
EOF
191-
192-
REXML::Document.new(xml)
193-
REXML::Security.entity_expansion_limit = 90
194-
assert_equal(90, REXML::Security.entity_expansion_limit)
195-
assert_raise(REXML::ParseException) do
196-
REXML::Document.new(xml)
197-
end
198-
end
199-
end
200150
end
201151

202152
def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source

test/test_entity.rb

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,7 @@ def test_parse_entity
5959

6060
def test_constructor
6161
one = [ %q{<!ENTITY % YN '"Yes"'>},
62-
%q{<!ENTITY % YN2 "Yes">},
63-
%q{<!ENTITY WhatHeSaid "He said %YN;">},
62+
%q{<!ENTITY WhatHeSaid 'He said "Yes"'>},
6463
'<!ENTITY open-hatch
6564
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">',
6665
'<!ENTITY open-hatch2
@@ -71,8 +70,7 @@ def test_constructor
7170
NDATA gif>' ]
7271
source = %q{<!DOCTYPE foo [
7372
<!ENTITY % YN '"Yes"'>
74-
<!ENTITY % YN2 "Yes">
75-
<!ENTITY WhatHeSaid "He said %YN;">
73+
<!ENTITY WhatHeSaid 'He said "Yes"'>
7674
<!ENTITY open-hatch
7775
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
7876
<!ENTITY open-hatch2
@@ -138,6 +136,28 @@ def test_doctype_entity_with_nested_references
138136
assert_equal("X", doctype.entities["b"].unnormalized)
139137
end
140138

139+
def test_parameter_entity_reference_forbidden_by_internal_subset_in_new
140+
assert_raise(RuntimeError.new('Parameter Entity References forbidden in internal subset: %a;')) do
141+
REXML::Entity.new( [:entitydecl, "c", "%a;"])
142+
end
143+
end
144+
145+
def test_parameter_entity_reference_forbidden_by_internal_subset_in_parser
146+
source = '<!DOCTYPE root [ <!ENTITY % a "B" > <!ENTITY c "%a;" > ]><root/>'
147+
parser = REXML::Parsers::BaseParser.new(source)
148+
exception = assert_raise(REXML::ParseException) do
149+
while parser.has_next?
150+
parser.pull
151+
end
152+
end
153+
assert_equal(<<-DETAIL, exception.to_s)
154+
Parameter Entity References forbidden in internal subset: "%a;"
155+
Line: 1
156+
Position: 54
157+
Last 80 unconsumed characters:
158+
DETAIL
159+
end
160+
141161
def test_entity_string_limit
142162
template = '<!DOCTYPE bomb [ <!ENTITY a "^" > ]> <bomb>$</bomb>'
143163
len = 5120 # 5k per entity
@@ -156,22 +176,6 @@ def test_entity_string_limit
156176
end
157177
end
158178

159-
def test_entity_string_limit_for_parameter_entity
160-
template = '<!DOCTYPE bomb [ <!ENTITY % a "^" > <!ENTITY bomb "$" > ]><root/>'
161-
len = 5120 # 5k per entity
162-
template.sub!(/\^/, "B" * len)
163-
164-
# 10k is OK
165-
entities = '%a;' * 2 # 5k entity * 2 = 10k
166-
REXML::Document.new(template.sub(/\$/, entities))
167-
168-
# above 10k explodes
169-
entities = '%a;' * 3 # 5k entity * 2 = 15k
170-
assert_raise(REXML::ParseException) do
171-
REXML::Document.new(template.sub(/\$/, entities))
172-
end
173-
end
174-
175179
def test_raw
176180
source = '<!DOCTYPE foo [
177181
<!ENTITY ent "replace">
@@ -195,7 +199,7 @@ def test_lazy_evaluation
195199
def test_entity_replacement
196200
source = %q{<!DOCTYPE foo [
197201
<!ENTITY % YN '"Yes"'>
198-
<!ENTITY WhatHeSaid "He said %YN;">]>
202+
<!ENTITY WhatHeSaid 'He said "Yes"'>]>
199203
<a>&WhatHeSaid;</a>}
200204

201205
d = REXML::Document.new( source )

0 commit comments

Comments
 (0)