Skip to content

Commit a9696e2

Browse files
phillipbUziTech
andauthored
fix: retain line breaks in tokens properly (#2341)
* Fix lexer and tokenizer to retain line breaks properly * Add test for bug * Check for line breaks not just spaces * Fix lint * Fix spacing in test * clean up code Co-authored-by: Tony Brix <[email protected]>
1 parent 6aacd13 commit a9696e2

File tree

4 files changed

+177
-68
lines changed

4 files changed

+177
-68
lines changed

src/Lexer.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,11 @@ export class Lexer {
152152
// newline
153153
if (token = this.tokenizer.space(src)) {
154154
src = src.substring(token.raw.length);
155-
if (token.type) {
155+
if (token.raw.length === 1 && tokens.length > 0) {
156+
// if there's a single \n as a spacer, it's terminating the last line,
157+
// so move it there so that we don't get unecessary paragraph tags
158+
tokens[tokens.length - 1].raw += '\n';
159+
} else {
156160
tokens.push(token);
157161
}
158162
continue;

src/Tokenizer.js

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,11 @@ export class Tokenizer {
7272

7373
space(src) {
7474
const cap = this.rules.block.newline.exec(src);
75-
if (cap) {
76-
if (cap[0].length > 1) {
77-
return {
78-
type: 'space',
79-
raw: cap[0]
80-
};
81-
}
82-
return { raw: '\n' };
75+
if (cap && cap[0].length > 0) {
76+
return {
77+
type: 'space',
78+
raw: cap[0]
79+
};
8380
}
8481
}
8582

@@ -303,7 +300,24 @@ export class Tokenizer {
303300
for (i = 0; i < l; i++) {
304301
this.lexer.state.top = false;
305302
list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);
306-
if (!list.loose && list.items[i].tokens.some(t => t.type === 'space')) {
303+
const spacers = list.items[i].tokens.filter(t => t.type === 'space');
304+
const hasMultipleLineBreaks = spacers.every(t => {
305+
const chars = t.raw.split('');
306+
let lineBreaks = 0;
307+
for (const char of chars) {
308+
if (char === '\n') {
309+
lineBreaks += 1;
310+
}
311+
if (lineBreaks > 1) {
312+
return true;
313+
}
314+
}
315+
316+
return false;
317+
});
318+
319+
if (!list.loose && spacers.length && hasMultipleLineBreaks) {
320+
// Having a single line break doesn't mean a list is loose. A single line break is terminating the last list item
307321
list.loose = true;
308322
list.items[i].loose = true;
309323
}

test/unit/Lexer-spec.js

Lines changed: 148 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ lheading 2
9393
----------
9494
`,
9595
tokens: [
96+
{
97+
type: 'space',
98+
raw: '\n'
99+
},
96100
{
97101
type: 'heading',
98102
raw: '# heading 1\n\n',
@@ -175,6 +179,9 @@ lheading 2
175179
| 1 | 2 |
176180
`,
177181
tokens: [{
182+
type: 'space',
183+
raw: '\n'
184+
}, {
178185
type: 'table',
179186
align: [null, null],
180187
raw: '| a | b |\n|---|---|\n| 1 | 2 |\n',
@@ -212,40 +219,42 @@ paragraph 1
212219
|---|---|
213220
| 1 | 2 |
214221
`,
215-
tokens: [
216-
{
217-
type: 'paragraph',
218-
raw: 'paragraph 1',
219-
text: 'paragraph 1',
220-
tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1' }]
221-
},
222-
{
223-
type: 'table',
224-
align: [null, null],
225-
raw: '| a | b |\n|---|---|\n| 1 | 2 |\n',
226-
header: [
222+
tokens: [{
223+
type: 'space',
224+
raw: '\n'
225+
}, {
226+
type: 'paragraph',
227+
raw: 'paragraph 1\n',
228+
text: 'paragraph 1',
229+
tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1' }]
230+
},
231+
{
232+
type: 'table',
233+
align: [null, null],
234+
raw: '| a | b |\n|---|---|\n| 1 | 2 |\n',
235+
header: [
236+
{
237+
text: 'a',
238+
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
239+
},
240+
{
241+
text: 'b',
242+
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
243+
}
244+
],
245+
rows: [
246+
[
227247
{
228-
text: 'a',
229-
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
248+
text: '1',
249+
tokens: [{ type: 'text', raw: '1', text: '1' }]
230250
},
231251
{
232-
text: 'b',
233-
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
252+
text: '2',
253+
tokens: [{ type: 'text', raw: '2', text: '2' }]
234254
}
235-
],
236-
rows: [
237-
[
238-
{
239-
text: '1',
240-
tokens: [{ type: 'text', raw: '1', text: '1' }]
241-
},
242-
{
243-
text: '2',
244-
tokens: [{ type: 'text', raw: '2', text: '2' }]
245-
}
246-
]
247255
]
248-
}
256+
]
257+
}
249258
]
250259
});
251260
});
@@ -258,6 +267,9 @@ paragraph 1
258267
| 1 | 2 | 3 |
259268
`,
260269
tokens: [{
270+
type: 'space',
271+
raw: '\n'
272+
}, {
261273
type: 'table',
262274
align: ['left', 'center', 'right'],
263275
raw: '| a | b | c |\n|:--|:-:|--:|\n| 1 | 2 | 3 |\n',
@@ -302,33 +314,37 @@ a | b
302314
--|--
303315
1 | 2
304316
`,
305-
tokens: [{
306-
type: 'table',
307-
align: [null, null],
308-
raw: 'a | b\n--|--\n1 | 2\n',
309-
header: [
310-
{
311-
text: 'a',
312-
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
313-
},
314-
{
315-
text: 'b',
316-
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
317-
}
318-
],
319-
rows: [
320-
[
317+
tokens: [
318+
{
319+
type: 'space',
320+
raw: '\n'
321+
}, {
322+
type: 'table',
323+
align: [null, null],
324+
raw: 'a | b\n--|--\n1 | 2\n',
325+
header: [
321326
{
322-
text: '1',
323-
tokens: [{ type: 'text', raw: '1', text: '1' }]
327+
text: 'a',
328+
tokens: [{ type: 'text', raw: 'a', text: 'a' }]
324329
},
325330
{
326-
text: '2',
327-
tokens: [{ type: 'text', raw: '2', text: '2' }]
331+
text: 'b',
332+
tokens: [{ type: 'text', raw: 'b', text: 'b' }]
328333
}
334+
],
335+
rows: [
336+
[
337+
{
338+
text: '1',
339+
tokens: [{ type: 'text', raw: '1', text: '1' }]
340+
},
341+
{
342+
text: '2',
343+
tokens: [{ type: 'text', raw: '2', text: '2' }]
344+
}
345+
]
329346
]
330-
]
331-
}]
347+
}]
332348
});
333349
});
334350
});
@@ -342,6 +358,19 @@ a | b
342358
]
343359
});
344360
});
361+
362+
it('after line break does not consume raw \n', () => {
363+
expectTokens({
364+
md: 'T\nh\n---',
365+
tokens:
366+
jasmine.arrayContaining([
367+
jasmine.objectContaining({
368+
raw: 'T\nh\n'
369+
}),
370+
{ type: 'hr', raw: '---' }
371+
])
372+
});
373+
});
345374
});
346375

347376
describe('blockquote', () => {
@@ -376,8 +405,11 @@ a | b
376405
`,
377406
tokens: [
378407
{
408+
type: 'space',
409+
raw: '\n'
410+
}, {
379411
type: 'list',
380-
raw: '- item 1\n- item 2',
412+
raw: '- item 1\n- item 2\n',
381413
ordered: false,
382414
start: '',
383415
loose: false,
@@ -423,9 +455,13 @@ a | b
423455
2. item 2
424456
`,
425457
tokens: jasmine.arrayContaining([
458+
jasmine.objectContaining({
459+
type: 'space',
460+
raw: '\n'
461+
}),
426462
jasmine.objectContaining({
427463
type: 'list',
428-
raw: '1. item 1\n2. item 2',
464+
raw: '1. item 1\n2. item 2\n',
429465
ordered: true,
430466
start: 1,
431467
items: [
@@ -448,9 +484,13 @@ a | b
448484
2) item 2
449485
`,
450486
tokens: jasmine.arrayContaining([
487+
jasmine.objectContaining({
488+
type: 'space',
489+
raw: '\n'
490+
}),
451491
jasmine.objectContaining({
452492
type: 'list',
453-
raw: '1) item 1\n2) item 2',
493+
raw: '1) item 1\n2) item 2\n',
454494
ordered: true,
455495
start: 1,
456496
items: [
@@ -475,6 +515,10 @@ a | b
475515
paragraph
476516
`,
477517
tokens: [
518+
{
519+
type: 'space',
520+
raw: '\n'
521+
},
478522
{
479523
type: 'list',
480524
raw: '- item 1\n- item 2',
@@ -515,7 +559,7 @@ paragraph
515559
{ type: 'space', raw: '\n\n' },
516560
{
517561
type: 'paragraph',
518-
raw: 'paragraph',
562+
raw: 'paragraph\n',
519563
text: 'paragraph',
520564
tokens: [{
521565
type: 'text',
@@ -534,9 +578,13 @@ paragraph
534578
3. item 2
535579
`,
536580
tokens: jasmine.arrayContaining([
581+
jasmine.objectContaining({
582+
type: 'space',
583+
raw: '\n'
584+
}),
537585
jasmine.objectContaining({
538586
type: 'list',
539-
raw: '2. item 1\n3. item 2',
587+
raw: '2. item 1\n3. item 2\n',
540588
ordered: true,
541589
start: 2,
542590
items: [
@@ -560,9 +608,13 @@ paragraph
560608
- item 2
561609
`,
562610
tokens: jasmine.arrayContaining([
611+
jasmine.objectContaining({
612+
type: 'space',
613+
raw: '\n'
614+
}),
563615
jasmine.objectContaining({
564616
type: 'list',
565-
raw: '- item 1\n\n- item 2',
617+
raw: '- item 1\n\n- item 2\n',
566618
loose: true,
567619
items: [
568620
jasmine.objectContaining({
@@ -577,16 +629,54 @@ paragraph
577629
});
578630
});
579631

632+
it('not loose with spaces', () => {
633+
expectTokens({
634+
md: `
635+
- item 1
636+
- item 2
637+
`,
638+
tokens: jasmine.arrayContaining([
639+
jasmine.objectContaining({
640+
type: 'space',
641+
raw: '\n'
642+
}),
643+
jasmine.objectContaining({
644+
type: 'list',
645+
raw: '- item 1\n - item 2\n',
646+
loose: false,
647+
items: [
648+
jasmine.objectContaining({
649+
raw: '- item 1\n - item 2',
650+
tokens: jasmine.arrayContaining([
651+
jasmine.objectContaining({
652+
raw: 'item 1\n'
653+
}),
654+
jasmine.objectContaining({
655+
type: 'list',
656+
raw: '- item 2'
657+
})
658+
])
659+
})
660+
]
661+
})
662+
])
663+
});
664+
});
665+
580666
it('task', () => {
581667
expectTokens({
582668
md: `
583669
- [ ] item 1
584670
- [x] item 2
585671
`,
586672
tokens: jasmine.arrayContaining([
673+
jasmine.objectContaining({
674+
type: 'space',
675+
raw: '\n'
676+
}),
587677
jasmine.objectContaining({
588678
type: 'list',
589-
raw: '- [ ] item 1\n- [x] item 2',
679+
raw: '- [ ] item 1\n- [x] item 2\n',
590680
items: [
591681
jasmine.objectContaining({
592682
raw: '- [ ] item 1\n',

test/unit/marked-spec.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,7 @@ br
994994
});
995995

996996
expect(tokensSeen).toEqual([
997+
['space', ''],
997998
['paragraph', 'paragraph'],
998999
['text', 'paragraph'],
9991000
['space', ''],

0 commit comments

Comments
 (0)