Improve algo to find UNA when the file does not start by UNA

pulse-mind · pulse-mind · commit 303cfd52a8de · 2024-10-18T18:32:55.000+02:00
diff --git a/pydifact/parser.py b/pydifact/parser.py
@@ -52,15 +52,25 @@ def parse(
         # If there is a UNA, take the following 6 characters
         # unconditionally, strip them, and make control Characters()
         # for further parsing
-        idx_una = message.find("UNA")
+
+        # If it starts by UNA
+        una_pattern = "UNA"
+        if message.startswith(una_pattern):
+            idx_una = 0
+        # Otherwise we look for UNA, so to avoid finding "lorem ipsum UNA lorem ipsum" we look for the segment separator following by UNA.
+        else:
+            una_pattern = "'UNA"
+            idx_una = message.find(una_pattern)
         una_found = idx_una != -1
 
         if una_found:
-            characters = Characters.from_str("UNA" + message[idx_una + 3: idx_una + 9])
+            idx_begin = idx_una + len(una_pattern)
+            idx_end = idx_begin + 6
+            characters = Characters.from_str(f"UNA{message[idx_begin: idx_end]}")
 
             # remove the UNA segment from the string,
             # ignore everything before UNA because it should be the first segment if una_found.
-            message = message[idx_una + 9 :].lstrip("\r\n")
+            message = message[idx_end:].lstrip("\r\n")
 
         else:
             # if no UNA header present, use default control characters