Skip to content

Commit bbac0fa

Browse files
committed
HtmlFilter: <script> content should be filtered out
1 parent bebe5dc commit bbac0fa

File tree

2 files changed

+65
-29
lines changed

2 files changed

+65
-29
lines changed

src/Source/Filter/HtmlFilter.php

Lines changed: 63 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,31 @@
1616
*/
1717
class HtmlFilter implements Filter
1818
{
19+
/**
20+
* Attribute name context.
21+
*/
22+
const CTX_ATTR_NAME = 'attr_name';
23+
24+
/**
25+
* Attribute value context.
26+
*/
27+
const CTX_ATTR_VALUE = 'attr_value';
28+
29+
/**
30+
* Tag attributes context.
31+
*/
32+
const CTX_TAG_ATTRS = 'tag_attrs';
33+
34+
/**
35+
* Tag content context.
36+
*/
37+
const CTX_TAG_CONTENT = 'tag_content';
38+
39+
/**
40+
* Tag name context.
41+
*/
42+
const CTX_TAG_NAME = 'tag_name';
43+
1944
/**
2045
* Ignore content of these tags.
2146
*
@@ -60,24 +85,32 @@ public function filter($string)
6085
// Current/last attribute name
6186
$attrName = null;
6287
// Current context
63-
$context = null;
88+
$context = self::CTX_TAG_CONTENT;
6489
// Expected context
6590
$expecting = null;
6691

92+
// By default tag content treated as text.
93+
$ignoreTagContent = false;
94+
// By default attribute values NOT treated as text.
95+
$ignoreAttrValue = true;
96+
6797
$length = mb_strlen($string);
6898
for ($i = 0; $i < $length; $i++) {
6999
$char = mb_substr($string, $i, 1);
70100
switch (true) {
71101
case '<' === $char:
72-
$context = 'tag_name';
102+
$context = self::CTX_TAG_NAME;
73103
$tagName = null;
74104
$char = ' ';
75105
break;
76106

77107
case '>' === $char:
78-
$context = 'tag_name' === $context && $this->isIgnoredTag($tagName)
79-
? 'ignored_tag_content'
80-
: null;
108+
if ($this->isIgnoredTag($tagName)) {
109+
$ignoreTagContent = true;
110+
} elseif ('/' === $tagName[0]) {
111+
$ignoreTagContent = false; // Restore to default state.
112+
}
113+
$context = self::CTX_TAG_CONTENT;
81114
$expecting = null;
82115
$char = ' ';
83116
break;
@@ -86,64 +119,67 @@ public function filter($string)
86119
case "\n" === $char:
87120
case "\t" === $char:
88121
switch ($context) {
89-
case 'tag_name':
90-
$context = 'tag_attrs';
122+
case self::CTX_TAG_NAME:
123+
$context = self::CTX_TAG_ATTRS;
91124
break;
92125

93-
case 'attr_name':
94-
$context = 'tag_attrs';
126+
case self::CTX_ATTR_NAME:
127+
$context = self::CTX_TAG_ATTRS;
95128
break;
96129
}
97130
break;
98131

99-
case '=' === $char && ('attr_name' === $context || 'tag_attrs' === $context):
100-
$expecting = 'attr_value';
132+
case '=' === $char
133+
&& (self::CTX_ATTR_NAME === $context || self::CTX_TAG_ATTRS === $context):
134+
$expecting = self::CTX_ATTR_VALUE;
101135
$char = ' ';
102136
break;
103137

104138
case '"' === $char:
105139
case "'" === $char:
106140
switch (true) {
107-
case 'attr_value' === $expecting:
108-
$context = 'attr_value';
109-
if (in_array(strtolower($attrName), self::$textAttrs, true)) {
110-
$context = 'attr_text';
111-
}
141+
case self::CTX_ATTR_VALUE === $expecting:
142+
$context = self::CTX_ATTR_VALUE;
143+
$ignoreAttrValue
144+
= !in_array(strtolower($attrName), self::$textAttrs, true);
112145
$expecting = null;
113146
$char = ' ';
114147
break;
115148

116-
case 'attr_value' === $context:
117-
case 'attr_text' === $context:
118-
$context = 'tag_attrs';
149+
case self::CTX_ATTR_VALUE === $context:
150+
$context = self::CTX_TAG_ATTRS;
119151
$char = ' ';
120152
break;
121153
}
122154
break;
123155

124156
default:
125157
switch ($context) {
126-
case 'tag_name':
158+
case self::CTX_TAG_NAME:
127159
$tagName .= $char;
128160
$char = ' ';
129161
break;
130162

131163
/** @noinspection PhpMissingBreakStatementInspection */
132-
case 'tag_attrs':
133-
$context = 'attr_name';
164+
case self::CTX_TAG_ATTRS:
165+
$context = self::CTX_ATTR_NAME;
134166
$attrName = null;
135167
// no break needed
136-
case 'attr_name':
168+
case self::CTX_ATTR_NAME:
137169
$attrName .= $char;
138170
$char = ' ';
139171
break;
140172

141-
case 'attr_value':
142-
$char = ' ';
173+
case self::CTX_ATTR_VALUE:
174+
if ($ignoreAttrValue) {
175+
$char = ' ';
176+
}
143177
break;
144178

145-
case 'ignored_tag_content':
146-
$char = ' ';
179+
case self::CTX_TAG_CONTENT:
180+
if ($ignoreTagContent) {
181+
$char = ' ';
182+
}
147183
break;
148184
}
149185
}

tests/Source/Filter/HtmlFilterTest.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ public function testMetaContent()
5353
public function testScript()
5454
{
5555
$filter = new HtmlFilter();
56-
$html = "<p>Foo</p>\n<script>Bar Baz\nBuz</script>";
57-
$text = " Foo \n \n ";
56+
$html = "<p>Foo</p>\n<script type=\"text/javascript\">Bar Baz\nBuz</script>";
57+
$text = " Foo \n \n ";
5858
static::assertEquals($text, $filter->filter($html));
5959
}
6060
}

0 commit comments

Comments
 (0)