From faa6535dd588025bf7a5ecfb4a95d3013ade56c4 Mon Sep 17 00:00:00 2001 From: Junology Date: Tue, 10 Dec 2024 04:30:07 +0900 Subject: [PATCH 1/3] Refactor SVG path parser --- core/src/processing/core/PShapeSVG.java | 101 ++++++++++++------- core/test/processing/core/PShapeSVGTest.java | 32 +++++- 2 files changed, 95 insertions(+), 38 deletions(-) diff --git a/core/src/processing/core/PShapeSVG.java b/core/src/processing/core/PShapeSVG.java index e85389f02e..19731769f8 100644 --- a/core/src/processing/core/PShapeSVG.java +++ b/core/src/processing/core/PShapeSVG.java @@ -514,53 +514,86 @@ protected void parsePath() { char[] pathDataChars = pathData.toCharArray(); StringBuilder pathBuffer = new StringBuilder(); - boolean lastSeparate = false; - boolean isOnDecimal = false; + + /* + * The state of the lexer: + * -1: just after the command (i.e. a single alphabet) + * 0: neutral state + * 1: on a digit sequence for integer representation + * 2: on a decimal + * 3: on a digit or a sign in exponent in scientific notation, e.g. 3.14e-2) + * 4: on a digit sequence in exponent + */ + int lexState = 0; for (int i = 0; i < pathDataChars.length; i++) { char c = pathDataChars[i]; - boolean separate = false; - - if (c == 'M' || c == 'm' || - c == 'L' || c == 'l' || - c == 'H' || c == 'h' || - c == 'V' || c == 'v' || - c == 'C' || c == 'c' || // beziers - c == 'S' || c == 's' || - c == 'Q' || c == 'q' || // quadratic beziers - c == 'T' || c == 't' || - c == 'A' || c == 'a' || // elliptical arc - c == 'Z' || c == 'z' || // closepath - c == ',') { - separate = true; - if (i != 0) { + + // Put a separator after a command. + if (lexState == -1) { + pathBuffer.append("|"); + lexState = 0; + } + + if (c >= '0' && c <= '9') { + if (lexState == 0 || lexState == 3) { + // If it is a head of a number representation, enter the 'inside' of the digit sequence. + ++lexState; + } + pathBuffer.append(c); + continue; + } + + if (c == '-') { + if (lexState == 0) { + // In neutral state, enter 'digit sequence'. + lexState = 1; + } + else if (lexState == 3) { + // In the begining of an exponent, enter 'exponent digit sequence'. + lexState = 4; + } + else { + // Otherwise, begin a new number representation. pathBuffer.append("|"); + lexState = 1; } + pathBuffer.append("-"); + continue; } - if (c == 'Z' || c == 'z') { - separate = false; + + if (c == '.') { + if (lexState >= 2) { + // Begin a new decimal number unless it is in a neutral state or after a digit sequence + pathBuffer.append("|"); + } + pathBuffer.append("."); + lexState = 2; + continue; } - if (c == '.' && !isOnDecimal) { - isOnDecimal = true; + + if (c == 'e' || c == 'E') { + // Found 'e' or 'E', enter the 'exponent' state immediately. + pathBuffer.append("e"); + lexState = 3; + continue; } - else if (isOnDecimal && (c < '0' || c > '9')) { + + // The following are executed for non-numeral elements + + if (lexState != 0) { pathBuffer.append("|"); - isOnDecimal = c == '.'; - } - if (c == '-' && !lastSeparate) { - // allow for 'e' notation in numbers, e.g. 2.10e-9 - // https://download.processing.org/bugzilla/1408.html - if (i == 0 || pathDataChars[i-1] != 'e') { - pathBuffer.append("|"); - } + lexState = 0; } + if (c != ',') { - pathBuffer.append(c); //"" + pathDataBuffer.charAt(i)); + pathBuffer.append(c); } - if (separate && c != ',' && c != '-') { - pathBuffer.append("|"); + + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { + // Every alphabet character except for 'e' and 'E' are considered as a command. + lexState = -1; } - lastSeparate = separate; } // use whitespace constant to get rid of extra spaces and CR or LF diff --git a/core/test/processing/core/PShapeSVGTest.java b/core/test/processing/core/PShapeSVGTest.java index 2b9ddd96cb..a38c3ae1bc 100644 --- a/core/test/processing/core/PShapeSVGTest.java +++ b/core/test/processing/core/PShapeSVGTest.java @@ -10,23 +10,47 @@ public class PShapeSVGTest { - private static final String TEST_CONTENT = ""; + private static final String[] TEST_CONTENT = { + "", + "" + }; + private static final int[] TEST_NVERTEX = {2, 8}; + private static final String TEST_EXPONENT = + ""; @Test public void testDecimals() { try { - XML xml = XML.parse(TEST_CONTENT); + for (int i = 0; i < TEST_CONTENT.length; ++i) { + XML xml = XML.parse(TEST_CONTENT[i]); + PShapeSVG shape = new PShapeSVG(xml); + PShape[] children = shape.getChildren(); + Assert.assertEquals(1, children.length); + PShape[] grandchildren = children[0].getChildren(); + Assert.assertEquals(1, grandchildren.length); + Assert.assertEquals(0, grandchildren[0].getChildCount()); + Assert.assertEquals(TEST_NVERTEX[i], grandchildren[0].getVertexCount()); + } + } + catch (Exception e) { + Assert.fail("Encountered exception " + e); + } + } + + @Test + public void testExponent() { + try { + XML xml = XML.parse(TEST_EXPONENT); PShapeSVG shape = new PShapeSVG(xml); PShape[] children = shape.getChildren(); Assert.assertEquals(1, children.length); PShape[] grandchildren = children[0].getChildren(); Assert.assertEquals(1, grandchildren.length); Assert.assertEquals(0, grandchildren[0].getChildCount()); - Assert.assertEquals(2, grandchildren[0].getVertexCount()); + Assert.assertEquals(8, grandchildren[0].getVertexCount()); } catch (Exception e) { Assert.fail("Encountered exception " + e); } } - } From f50ff24333f102313119c354671c67036cf5afbc Mon Sep 17 00:00:00 2001 From: Junology Date: Thu, 12 Dec 2024 22:30:31 +0900 Subject: [PATCH 2/3] Make lexical scanner state into enum --- core/src/processing/core/PShapeSVG.java | 45 +++++++++++++++---------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/core/src/processing/core/PShapeSVG.java b/core/src/processing/core/PShapeSVG.java index 19731769f8..684a7cc253 100644 --- a/core/src/processing/core/PShapeSVG.java +++ b/core/src/processing/core/PShapeSVG.java @@ -515,6 +515,14 @@ protected void parsePath() { StringBuilder pathBuffer = new StringBuilder(); + enum LexState { + AFTER_CMD,// Just after a command (i.e. a single alphabet) + NEUTRAL, // Neutral state, waiting for a number expression or a command + INTEGER, // On a sequence of digits possibly led by the '-' sign + DECIMAL, // On a digit sequence following the decimal point '.' + EXP_HEAD, // On the head of the exponent part of a scientific notation; the '-' sign or a digit + EXP_TAIL, // On the integer expression in the exponent part + } /* * The state of the lexer: * -1: just after the command (i.e. a single alphabet) @@ -524,66 +532,69 @@ protected void parsePath() { * 3: on a digit or a sign in exponent in scientific notation, e.g. 3.14e-2) * 4: on a digit sequence in exponent */ - int lexState = 0; + LexState lexState = LexState.NEUTRAL; for (int i = 0; i < pathDataChars.length; i++) { char c = pathDataChars[i]; // Put a separator after a command. - if (lexState == -1) { + if (lexState == LexState.AFTER_CMD) { pathBuffer.append("|"); - lexState = 0; + lexState = LexState.NEUTRAL; } if (c >= '0' && c <= '9') { - if (lexState == 0 || lexState == 3) { - // If it is a head of a number representation, enter the 'inside' of the digit sequence. - ++lexState; + // If it is a head of a number representation, enter the 'inside' of the digit sequence. + if (lexState == LexState.NEUTRAL) { + lexState = LexState.INTEGER; + } + else if (lexState == LexState.EXP_HEAD) { + lexState = LexState.EXP_TAIL; } pathBuffer.append(c); continue; } if (c == '-') { - if (lexState == 0) { + if (lexState == LexState.NEUTRAL) { // In neutral state, enter 'digit sequence'. - lexState = 1; + lexState = LexState.INTEGER; } - else if (lexState == 3) { + else if (lexState == LexState.EXP_HEAD) { // In the begining of an exponent, enter 'exponent digit sequence'. - lexState = 4; + lexState = LexState.EXP_TAIL; } else { // Otherwise, begin a new number representation. pathBuffer.append("|"); - lexState = 1; + lexState = LexState.INTEGER; } pathBuffer.append("-"); continue; } if (c == '.') { - if (lexState >= 2) { + if (lexState == LexState.DECIMAL || lexState == LexState.EXP_HEAD || lexState == LexState.EXP_TAIL) { // Begin a new decimal number unless it is in a neutral state or after a digit sequence pathBuffer.append("|"); } pathBuffer.append("."); - lexState = 2; + lexState = LexState.DECIMAL; continue; } if (c == 'e' || c == 'E') { // Found 'e' or 'E', enter the 'exponent' state immediately. pathBuffer.append("e"); - lexState = 3; + lexState = LexState.EXP_HEAD; continue; } // The following are executed for non-numeral elements - if (lexState != 0) { + if (lexState != LexState.NEUTRAL) { pathBuffer.append("|"); - lexState = 0; + lexState = LexState.NEUTRAL; } if (c != ',') { @@ -592,7 +603,7 @@ else if (lexState == 3) { if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { // Every alphabet character except for 'e' and 'E' are considered as a command. - lexState = -1; + lexState = LexState.AFTER_CMD; } } From 4d711688e6bd1a3e99df43300c05b58f29f51e4f Mon Sep 17 00:00:00 2001 From: Junology Date: Thu, 12 Dec 2024 22:35:57 +0900 Subject: [PATCH 3/3] Clean up comments around SVG path lexical scanner --- core/src/processing/core/PShapeSVG.java | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/core/src/processing/core/PShapeSVG.java b/core/src/processing/core/PShapeSVG.java index 684a7cc253..59f54f87f3 100644 --- a/core/src/processing/core/PShapeSVG.java +++ b/core/src/processing/core/PShapeSVG.java @@ -515,6 +515,7 @@ protected void parsePath() { StringBuilder pathBuffer = new StringBuilder(); + // The states of the lexical sanner enum LexState { AFTER_CMD,// Just after a command (i.e. a single alphabet) NEUTRAL, // Neutral state, waiting for a number expression or a command @@ -523,15 +524,6 @@ enum LexState { EXP_HEAD, // On the head of the exponent part of a scientific notation; the '-' sign or a digit EXP_TAIL, // On the integer expression in the exponent part } - /* - * The state of the lexer: - * -1: just after the command (i.e. a single alphabet) - * 0: neutral state - * 1: on a digit sequence for integer representation - * 2: on a decimal - * 3: on a digit or a sign in exponent in scientific notation, e.g. 3.14e-2) - * 4: on a digit sequence in exponent - */ LexState lexState = LexState.NEUTRAL; for (int i = 0; i < pathDataChars.length; i++) {