@@ -24,26 +24,38 @@ class Lexer
2424 const T_INVALID = -1 ;
2525 const T_SKIP_WHITESPACE = 0 ;
2626 const T_ERROR = 2 ;
27+ /** @internal */
28+ const T_BREAK_LINE = 3 ;
29+ /** @internal */
30+ const T_COMMENT = 30 ;
31+ /** @internal */
32+ const T_OPEN_COMMENT = 31 ;
33+ /** @internal */
34+ const T_CLOSE_COMMENT = 32 ;
2735
2836 /**
29- * @phpstan-var array<int<0,13 >, string>
37+ * @phpstan-var array<int<0,17 >, string>
3038 * @const
3139 */
3240 private $ rules = array (
33- 0 => '/\G\s+/ ' ,
34- 1 => '/\G-?([0-9]|[1-9][0-9]+)(\.[0-9]+)?([eE][+-]?[0-9]+)?\b/ ' ,
35- 2 => '{\G"(?> \\\\["bfnrt/ \\\\]| \\\\u[a-fA-F0-9]{4}|[^\0-\x1f \\\\"]++)*+"} ' ,
36- 3 => '/\G\{/ ' ,
37- 4 => '/\G\}/ ' ,
38- 5 => '/\G\[/ ' ,
39- 6 => '/\G\]/ ' ,
40- 7 => '/\G,/ ' ,
41- 8 => '/\G:/ ' ,
42- 9 => '/\Gtrue\b/ ' ,
43- 10 => '/\Gfalse\b/ ' ,
44- 11 => '/\Gnull\b/ ' ,
45- 12 => '/\G$/ ' ,
46- 13 => '/\G./ ' ,
41+ 0 => '/\G\s*\n\r?/ ' ,
42+ 1 => '/\G\s+/ ' ,
43+ 2 => '/\G-?([0-9]|[1-9][0-9]+)(\.[0-9]+)?([eE][+-]?[0-9]+)?\b/ ' ,
44+ 3 => '{\G"(?> \\\\["bfnrt/ \\\\]| \\\\u[a-fA-F0-9]{4}|[^\0-\x1f \\\\"]++)*+"} ' ,
45+ 4 => '/\G\{/ ' ,
46+ 5 => '/\G\}/ ' ,
47+ 6 => '/\G\[/ ' ,
48+ 7 => '/\G\]/ ' ,
49+ 8 => '/\G,/ ' ,
50+ 9 => '/\G:/ ' ,
51+ 10 => '/\Gtrue\b/ ' ,
52+ 11 => '/\Gfalse\b/ ' ,
53+ 12 => '/\Gnull\b/ ' ,
54+ 13 => '/\G$/ ' ,
55+ 14 => '/\G\/\// ' ,
56+ 15 => '/\G\/\*/ ' ,
57+ 16 => '/\G\*\// ' ,
58+ 17 => '/\G./ ' ,
4759 );
4860
4961 /** @var string */
@@ -54,6 +66,8 @@ class Lexer
5466 private $ done ;
5567 /** @var 0|positive-int */
5668 private $ offset ;
69+ /** @var int */
70+ private $ flags ;
5771
5872 /** @var string */
5973 public $ match ;
@@ -66,16 +80,42 @@ class Lexer
6680 /** @var array{first_line: 0|positive-int, first_column: 0|positive-int, last_line: 0|positive-int, last_column: 0|positive-int} */
6781 public $ yylloc ;
6882
83+ /**
84+ * @param int $flags
85+ */
86+ public function __construct ($ flags = 0 )
87+ {
88+ $ this ->flags = $ flags ;
89+ }
90+
6991 /**
7092 * @return 1|4|6|8|10|11|14|17|18|21|22|23|24|-1
7193 */
7294 public function lex ()
7395 {
74- do {
96+ while ( true ) {
7597 $ symbol = $ this ->next ();
76- } while ($ symbol === self ::T_SKIP_WHITESPACE );
77-
78- return $ symbol ;
98+ switch ($ symbol ) {
99+ case self ::T_SKIP_WHITESPACE :
100+ case self ::T_BREAK_LINE :
101+ break ;
102+ case self ::T_COMMENT :
103+ case self ::T_OPEN_COMMENT :
104+ if (!($ this ->flags & JsonParser::ALLOW_COMMENTS )) {
105+ $ this ->parseError ('Lexical error on line ' . ($ this ->yylineno +1 ) . ". Comments are not allowed. \n" . $ this ->showPosition ());
106+ }
107+ $ this ->skipUntil ($ symbol === self ::T_COMMENT ? self ::T_BREAK_LINE : self ::T_CLOSE_COMMENT );
108+ if ($ this ->done ) {
109+ // last symbol '/\G$/' before EOF
110+ return 14 ;
111+ }
112+ break ;
113+ case self ::T_CLOSE_COMMENT :
114+ $ this ->parseError ('Lexical error on line ' . ($ this ->yylineno +1 ) . ". Unexpected token. \n" . $ this ->showPosition ());
115+ default :
116+ return $ symbol ;
117+ }
118+ }
79119 }
80120
81121 /**
@@ -160,7 +200,19 @@ protected function parseError($str)
160200 }
161201
162202 /**
163- * @return 0|1|4|6|8|10|11|14|17|18|21|22|23|24|-1
203+ * @param int $token
204+ * @return void
205+ */
206+ private function skipUntil ($ token )
207+ {
208+ $ symbol = $ this ->next ();
209+ while ($ symbol !== $ token && false === $ this ->done ) {
210+ $ symbol = $ this ->next ();
211+ }
212+ }
213+
214+ /**
215+ * @return 0|1|3|4|6|8|10|11|14|17|18|21|22|23|24|30|31|32|-1
164216 */
165217 private function next ()
166218 {
@@ -181,7 +233,7 @@ private function next()
181233 $ this ->match = '' ;
182234 }
183235
184- $ rulesLen = 14 ; // count($this->rules)
236+ $ rulesLen = count ($ this ->rules );
185237
186238 for ($ i =0 ; $ i < $ rulesLen ; $ i ++) {
187239 if (preg_match ($ this ->rules [$ i ], $ this ->input , $ match , 0 , $ this ->offset )) {
@@ -215,40 +267,47 @@ private function next()
215267
216268 /**
217269 * @param int $rule
218- * @return 0|4|6|8|10|11|14|17|18|21|22|23|24|-1
270+ * @return 0|3| 4|6|8|10|11|14|17|18|21|22|23|24|30|31|32 |-1
219271 */
220272 private function performAction ($ rule )
221273 {
222274 switch ($ rule ) {
223- case 0 :/* skip whitespace */
275+ case 0 :/* skip break line */
276+ return self ::T_BREAK_LINE ;
277+ case 1 :/* skip whitespace */
224278 return self ::T_SKIP_WHITESPACE ;
225- case 1 :
226- return 6 ;
227279 case 2 :
280+ return 6 ;
281+ case 3 :
228282 $ this ->yytext = substr ($ this ->yytext , 1 , $ this ->yyleng -2 );
229-
230283 return 4 ;
231- case 3 :
232- return 17 ;
233284 case 4 :
234- return 18 ;
285+ return 17 ;
235286 case 5 :
236- return 23 ;
287+ return 18 ;
237288 case 6 :
238- return 24 ;
289+ return 23 ;
239290 case 7 :
240- return 22 ;
291+ return 24 ;
241292 case 8 :
242- return 21 ;
293+ return 22 ;
243294 case 9 :
244- return 10 ;
295+ return 21 ;
245296 case 10 :
246- return 11 ;
297+ return 10 ;
247298 case 11 :
248- return 8 ;
299+ return 11 ;
249300 case 12 :
250- return 14 ;
301+ return 8 ;
251302 case 13 :
303+ return 14 ;
304+ case 14 :
305+ return self ::T_COMMENT ;
306+ case 15 :
307+ return self ::T_OPEN_COMMENT ;
308+ case 16 :
309+ return self ::T_CLOSE_COMMENT ;
310+ case 17 :
252311 return self ::T_INVALID ;
253312 default :
254313 throw new \LogicException ('Unsupported rule ' .$ rule );
0 commit comments