Skip to content

Commit a66f797

Browse files
committed
patched esoteric bug in parser
which previously made use of stold() to validate that a user-given string contained a number which can be parsed as a qreal. This enabled a bug where users using FLOAT_PRECISION=1 or 2 could specify a number (e.g. in createInlinePauliStrSum) which is well-formed but exceeds the maximum or minimum storable qreal. By using stold() (the long-double version of "string to float"), the validation was too lenient and permitted literals of numbers which can fit into a long-double-qreal but not the current smaller-precision qreal. We now instead use a string-to-float function specific to the precision of qreal (stof(), stod() or stold()) so that the "can store number as qreal" validation always runs correctly. We further refactored number parsing in parser.cpp to "trust" the regex and throw an internal error (rather than a user-blaming validation error) when the string-to-float functions fail when the regex assured otherwise. This
1 parent fd49f55 commit a66f797

File tree

2 files changed

+189
-97
lines changed

2 files changed

+189
-97
lines changed

quest/src/core/parser.cpp

Lines changed: 173 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* @author Tyson Jones
1111
*/
1212

13+
#include "quest/include/precision.h"
1314
#include "quest/include/types.h"
1415
#include "quest/include/paulis.h"
1516

@@ -22,11 +23,9 @@
2223
#include <string>
2324
#include <sstream>
2425
#include <fstream>
25-
#include <cstdlib>
2626
#include <stdexcept>
2727
#include <algorithm>
2828

29-
using std::stold;
3029
using std::regex;
3130
using std::vector;
3231
using std::string;
@@ -83,9 +82,9 @@ namespace patterns {
8382
string num = group(comp) + "|" + group(imag) + "|" + group(real);
8483

8584
// no capturing because 'num' pollutes captured groups, and pauli syntax overlaps real integers
86-
string pauli = "[" + parser_RECOGNISED_PAULI_CHARS + "]";
85+
string pauli = "[" + parser_RECOGNISED_PAULI_CHARS + "]";
8786
string paulis = group(optSpace + pauli + optSpace) + "+";
88-
string line = "^" + group(num) + space + optSpace + paulis + "$";
87+
string weightedPaulis = "^" + group(num) + space + optSpace + paulis + "$";
8988
}
9089

9190

@@ -96,8 +95,8 @@ namespace regexes {
9695
regex imag(patterns::imag);
9796
regex comp(patterns::comp);
9897
regex num(patterns::num);
99-
regex line(patterns::line);
10098
regex paulis(patterns::paulis);
99+
regex weightedPaulis(patterns::weightedPaulis);
101100
}
102101

103102

@@ -173,6 +172,165 @@ int getNumPaulisInLine(string line) {
173172

174173

175174

175+
/*
176+
* REAL NUMBER PARSING
177+
*/
178+
179+
180+
qreal precisionAgnosticStringToFloat(string str) {
181+
182+
// remove whitespace which stold() et al cannot handle after the sign.
183+
// beware this means that e.g. "1 0" (invalid number) would become "10"
184+
// (valid) so this function cannot be used for duck-typing, though that
185+
// is anyway the case since stold() et al permit "10abc"
186+
removeWhiteSpace(str);
187+
188+
// below throws exception when the (prefix) of str cannot be/fit into a qreal
189+
if (FLOAT_PRECISION == 1) return static_cast<qreal>(std::stof (str));
190+
if (FLOAT_PRECISION == 2) return static_cast<qreal>(std::stod (str));
191+
if (FLOAT_PRECISION == 4) return static_cast<qreal>(std::stold(str));
192+
193+
// unreachable
194+
return -1;
195+
}
196+
197+
198+
bool parser_isAnySizedReal(string str) {
199+
200+
// we assume that all strings which match the regex can be parsed by
201+
// precisionAgnosticStringToFloat() above (once whitespace is removed)
202+
// EXCEPT strings which contain a number too large to store in the qreal
203+
// type (as is separately checked below). Note it is insufficient to merely
204+
// duck-type using stold() et al because such functions permit non-numerical
205+
// characters to follow the contained number (grr!)
206+
smatch match;
207+
return regex_match(str, match, regexes::real);
208+
}
209+
210+
211+
bool parser_isValidReal(string str) {
212+
213+
// reject str if it doesn't match regex
214+
if (!parser_isAnySizedReal(str))
215+
return false;
216+
217+
// check number is in-range of qreal via duck-typing
218+
try {
219+
precisionAgnosticStringToFloat(str);
220+
} catch (const out_of_range&) {
221+
return false;
222+
223+
// error if our regex permitted an unparsable string
224+
} catch (const invalid_argument&) {
225+
error_attemptedToParseRealFromInvalidString();
226+
}
227+
228+
return true;
229+
}
230+
231+
232+
qreal parser_parseReal(string str) {
233+
234+
try {
235+
return precisionAgnosticStringToFloat(str);
236+
} catch (const invalid_argument&) {
237+
error_attemptedToParseRealFromInvalidString();
238+
} catch (const out_of_range&) {
239+
error_attemptedToParseOutOfRangeReal();
240+
}
241+
242+
// unreachable
243+
return -1;
244+
}
245+
246+
247+
248+
/*
249+
* COMPLEX NUMBER PARSING
250+
*/
251+
252+
253+
bool parser_isAnySizedComplex(string str) {
254+
255+
// we assume that all strings which match the regex can be parsed to
256+
// a qcomp (once whitespace is removed) EXCEPT strings which contain a
257+
// number too large to store in the qcomp type (as is separately checked
258+
// below). Note it is insufficient to merely duck-type each component using
259+
// using stold() et al because such functions permit non-numerical chars to
260+
// follow the contained number (grr!)
261+
smatch match;
262+
263+
// must match real, imaginary or complex number regex
264+
if (regex_match(str, match, regexes::real)) return true;
265+
if (regex_match(str, match, regexes::imag)) return true;
266+
if (regex_match(str, match, regexes::comp)) return true;
267+
268+
return false;
269+
}
270+
271+
272+
bool parser_isValidComplex(string str) {
273+
274+
// reject str if it doesn't match complex regex
275+
if (!parser_isAnySizedComplex(str))
276+
return false;
277+
278+
// we've so far gauranteed str has a valid form, but we must now check
279+
// each included complex component (which we enumerate) is in range of a qreal
280+
sregex_iterator it(str.begin(), str.end(), regexes::real);
281+
sregex_iterator end;
282+
283+
// valid coeffs contain 1 or 2 reals, never 0, which regex should have caught
284+
if (it == end)
285+
error_attemptedToParseComplexFromInvalidString();
286+
287+
// for each of the 1 or 2 components...
288+
for (; it != end; it++) {
289+
290+
// check component is in-range of qreal via duck-typing
291+
try {
292+
precisionAgnosticStringToFloat(it->str(0));
293+
} catch (const out_of_range&) {
294+
return false;
295+
296+
// error if our regex permitted an unparsable component
297+
} catch (const invalid_argument&) {
298+
error_attemptedToParseComplexFromInvalidString();
299+
}
300+
}
301+
302+
// report that each/all detected components of str can form a valid qcomp
303+
return true;
304+
}
305+
306+
307+
qcomp parser_parseComplex(string str) {
308+
309+
if (!parser_isValidComplex(str))
310+
error_attemptedToParseComplexFromInvalidString();
311+
312+
// we are gauranteed to fully match real, imag or comp after prior validation
313+
smatch match;
314+
315+
// extract and parse components and their signs (excluding imaginary symbol)
316+
if (regex_match(str, match, regexes::real))
317+
return qcomp(parser_parseReal(match.str(1)), 0);
318+
319+
if (regex_match(str, match, regexes::imag))
320+
return qcomp(0, parser_parseReal(match.str(1)));
321+
322+
if (regex_match(str, match, regexes::comp))
323+
return qcomp(
324+
parser_parseReal(match.str(1)),
325+
parser_parseReal(match.str(2)));
326+
327+
// should be unreachable
328+
error_attemptedToParseComplexFromInvalidString();
329+
return qcomp(0,0);
330+
}
331+
332+
333+
176334
/*
177335
* VALIDATION
178336
*
@@ -189,15 +347,14 @@ bool isInterpretablePauliStrSumLine(string line) {
189347
// notation) followed by 1 or more space characters, then one or
190348
// more pauli codes/chars. It does NOT determine whether the coeff
191349
// can actually be instantiated as a qcomp
192-
return regex_match(line, regexes::line);
350+
return regex_match(line, regexes::weightedPaulis);
193351
}
194352

195353

196354
bool isCoeffValidInPauliStrSumLine(string line) {
197355

198356
// it is gauranteed that line is interpretable and contains a regex-matching
199-
// coefficient, but we must additionally verify it is within range of stold,
200-
// and isn't unexpectedly incompatible with stold in a way uncaptured by regex.
357+
// coefficient, but we must additionally verify it is within range of qreal.
201358
// So we duck type each of the 1 or 2 matches with the real regex (i.e. one or
202359
// both of the real and imaginary components of a complex coeff).
203360

@@ -216,17 +373,17 @@ bool isCoeffValidInPauliStrSumLine(string line) {
216373
// enumerate all matches of 'real' regex in line
217374
for (; it != end; it++) {
218375

219-
// removed whitespace (stold cannot handle space between sign and number)
376+
// remove whitespace (stold cannot handle space between sign and number)
220377
string match = it->str(0);
221378
removeWhiteSpace(match);
222379

223-
// return false if stold cannot parse the real as a long double
380+
// return false if number cannot become a qreal
224381
try {
225-
stold(match);
226-
} catch (const invalid_argument&) {
227-
return false;
382+
precisionAgnosticStringToFloat(match);
228383
} catch (const out_of_range&) {
229384
return false;
385+
} catch (const invalid_argument&) { // should be impossible (indicates bad regex)
386+
return false;
230387
}
231388
}
232389

@@ -300,52 +457,6 @@ int parser_getPauliIntFromChar(char ch) {
300457
*/
301458

302459

303-
qreal parseReal(string real) {
304-
305-
// attempt to parse at max precision (long double) then cast down if necessary
306-
try {
307-
return static_cast<qreal>(stold(real));
308-
309-
// should be impossible if regex and validation works correctly
310-
} catch (const invalid_argument&) {
311-
error_attemptedToParseRealFromInvalidString();
312-
313-
// should be prior caught by validation
314-
} catch (const out_of_range&) {
315-
error_attemptedToParseOutOfRangeReal();
316-
}
317-
318-
// unreachable
319-
return -1;
320-
}
321-
322-
323-
qcomp parseCoeff(string coeff) {
324-
325-
// remove all superfluous spaces in coeff so stold is happy (it cannot tolerate spaces after +-)
326-
removeWhiteSpace(coeff);
327-
328-
// we are gauranteed to fully match real, imag or comp after prior validation
329-
smatch match;
330-
331-
// extract and parse components and their signs (excluding imaginary symbol)
332-
if (regex_match(coeff, match, regexes::real))
333-
return qcomp(parseReal(match.str(1)), 0);
334-
335-
if (regex_match(coeff, match, regexes::imag))
336-
return qcomp(0, parseReal(match.str(1)));
337-
338-
if (regex_match(coeff, match, regexes::comp))
339-
return qcomp(
340-
parseReal(match.str(1)),
341-
parseReal(match.str(2)));
342-
343-
// should be unreachable
344-
error_attemptedToParseComplexFromInvalidString();
345-
return qcomp(0,0);
346-
}
347-
348-
349460
PauliStr parsePaulis(string paulis, bool rightIsLeastSignificant) {
350461

351462
// remove whitespace to make string compatible with getPauliStr()
@@ -364,14 +475,14 @@ PauliStr parsePaulis(string paulis, bool rightIsLeastSignificant) {
364475
}
365476

366477

367-
void parseLine(string line, qcomp &coeff, PauliStr &pauli, bool rightIsLeastSignificant) {
478+
void parseWeightedPaulis(string line, qcomp &coeff, PauliStr &pauli, bool rightIsLeastSignificant) {
368479

369480
// separate line into substrings
370481
string coeffStr, pauliStr;
371482
separateStringIntoCoeffAndPaulis(line, coeffStr, pauliStr);
372483

373484
// parse each, overwriting calller primitives
374-
coeff = parseCoeff(coeffStr);
485+
coeff = parser_parseComplex(coeffStr);
375486
pauli = parsePaulis(pauliStr, rightIsLeastSignificant);
376487
}
377488

@@ -403,7 +514,7 @@ PauliStrSum parser_validateAndParsePauliStrSum(string lines, bool rightIsLeastSi
403514

404515
qcomp coeff;
405516
PauliStr string;
406-
parseLine(line, coeff, string, rightIsLeastSignificant); // validates
517+
parseWeightedPaulis(line, coeff, string, rightIsLeastSignificant); // validates
407518

408519
coeffs.push_back(coeff);
409520
strings.push_back(string);
@@ -444,30 +555,3 @@ string parser_loadFile(string fn) {
444555
buffer << file.rdbuf();
445556
return buffer.str();
446557
}
447-
448-
449-
450-
/*
451-
* ENVIRONMENT VARIABLES
452-
*/
453-
454-
455-
bool parser_isStrEmpty(const char* str) {
456-
457-
// str can be unallocated or empty, but not e.g. whitespace
458-
return (str == nullptr) || (str[0] == '\0');
459-
}
460-
461-
462-
bool parser_validateAndParseOptionalBoolEnvVar(string varName, bool defaultVal, const char* caller) {
463-
464-
const char* varStr = std::getenv(varName.c_str());
465-
466-
// permit specifying no or empty environment variable (triggering default)
467-
if (parser_isStrEmpty(varStr))
468-
return defaultVal;
469-
470-
// otherwise it must be precisely 0 or 1 without whitespace
471-
validate_envVarIsBoolean(varName, varStr, caller);
472-
return (varStr[0] == '0')? 0 : 1;
473-
}

0 commit comments

Comments
 (0)