Skip to content

Commit 303cfd5

Browse files
committed
Improve algo to find UNA when the file does not start by UNA
1 parent 3724dc1 commit 303cfd5

File tree

1 file changed

+13
-3
lines changed

1 file changed

+13
-3
lines changed

pydifact/parser.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,25 @@ def parse(
5252
# If there is a UNA, take the following 6 characters
5353
# unconditionally, strip them, and make control Characters()
5454
# for further parsing
55-
idx_una = message.find("UNA")
55+
56+
# If it starts by UNA
57+
una_pattern = "UNA"
58+
if message.startswith(una_pattern):
59+
idx_una = 0
60+
# Otherwise we look for UNA, so to avoid finding "lorem ipsum UNA lorem ipsum" we look for the segment separator following by UNA.
61+
else:
62+
una_pattern = "'UNA"
63+
idx_una = message.find(una_pattern)
5664
una_found = idx_una != -1
5765

5866
if una_found:
59-
characters = Characters.from_str("UNA" + message[idx_una + 3: idx_una + 9])
67+
idx_begin = idx_una + len(una_pattern)
68+
idx_end = idx_begin + 6
69+
characters = Characters.from_str(f"UNA{message[idx_begin: idx_end]}")
6070

6171
# remove the UNA segment from the string,
6272
# ignore everything before UNA because it should be the first segment if una_found.
63-
message = message[idx_una + 9 :].lstrip("\r\n")
73+
message = message[idx_end:].lstrip("\r\n")
6474

6575
else:
6676
# if no UNA header present, use default control characters

0 commit comments

Comments
 (0)