|
1 | | -module T = Types.Types |
2 | | -(* ^file ^module *) |
| 1 | +open Str (* not reentrant, but simple and always available *) |
| 2 | +open Types |
3 | 3 |
|
4 | | -let find_re re str = |
5 | | - List.map |
6 | | - (function Str.Delim x -> x | Str.Text x -> "impossible!") |
7 | | - (List.filter |
8 | | - (function Str.Delim x -> true | Str.Text x -> false) |
9 | | - (Str.full_split re str)) |
| 4 | +let separator_re = regexp "\\([, \t\n]\\|;[^\n]*\\)+" |
| 5 | +let number_re = regexp "-?[0-9]+" |
| 6 | +let chars = "[^][, \t\n;(){}'`~@^\"]+" |
| 7 | +let keyword_re = regexp (":\\(" ^ chars ^ "\\)") |
| 8 | +let symbol_re = regexp chars |
| 9 | +let string_re = regexp {|"\(\(\\[\\n"]\|[^\\"]\)*\)"|} |
| 10 | +let escape_re = regexp {|\\.|} |
| 11 | +let quote_re = regexp_string "'" |
| 12 | +let quasiquote_re = regexp_string "`" |
| 13 | +let deref_re = regexp_string "@" |
| 14 | +let unquote_re = regexp_string "~" |
| 15 | +let sp_unq_re = regexp_string "~@" |
| 16 | +let with_meta_re = regexp_string "^" |
| 17 | +let list_re = regexp_string "(" |
| 18 | +let map_re = regexp_string "{" |
| 19 | +let vector_re = regexp_string "[" |
| 20 | +let close_re = regexp "[])}]" (* so "[1 2)" is accepted as a vector *) |
10 | 21 |
|
11 | | -let gsub re f str = |
12 | | - String.concat "" |
13 | | - (List.map |
14 | | - (function Str.Delim x -> f x | Str.Text x -> x) |
15 | | - (Str.full_split re str)) |
16 | | - |
17 | | -let token_re = |
18 | | - Str.regexp |
19 | | - "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\|[^][ \n\ |
20 | | - {}('\"`,;)]*" |
21 | | - |
22 | | -let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\"" |
23 | | - |
24 | | -type reader = { form : Types.mal_type; tokens : string list } |
25 | | -type list_reader = { list_form : Types.mal_type list; tokens : string list } |
26 | | - |
27 | | -let unescape_string token = |
28 | | - if Str.string_match string_re token 0 then |
29 | | - let without_quotes = String.sub token 1 (String.length token - 2) in |
30 | | - gsub (Str.regexp "\\\\.") |
31 | | - (function "\\n" -> "\n" | x -> String.sub x 1 1) |
32 | | - without_quotes |
33 | | - else raise (Invalid_argument "expected '\"', got EOF") |
34 | | - |
35 | | -let read_atom token = |
36 | | - match token with |
37 | | - | "nil" -> T.Nil |
38 | | - | "true" -> T.Bool true |
39 | | - | "false" -> T.Bool false |
40 | | - | _ -> ( |
41 | | - match token.[0] with |
42 | | - | '0' .. '9' -> T.Int (int_of_string token) |
43 | | - | '-' -> ( |
44 | | - match String.length token with |
45 | | - | 1 -> T.Symbol token |
46 | | - | _ -> ( |
47 | | - match token.[1] with |
48 | | - | '0' .. '9' -> T.Int (int_of_string token) |
49 | | - | _ -> T.Symbol token)) |
50 | | - | '"' -> T.String (unescape_string token) |
51 | | - | ':' -> T.Keyword (Str.replace_first (Str.regexp "^:") "" token) |
52 | | - | _ -> T.Symbol token) |
53 | | - |
54 | | -let rec read_list eol list_reader = |
55 | | - match list_reader.tokens with |
56 | | - | [] -> |
57 | | - raise (Invalid_argument (Format.asprintf "expected '%s', got EOF" eol)) |
58 | | - | token :: tokens -> |
59 | | - if Str.string_match (Str.regexp eol) token 0 then |
60 | | - { list_form = list_reader.list_form; tokens } |
61 | | - else if token.[0] = ';' then |
62 | | - read_list eol { list_form = list_reader.list_form; tokens } |
63 | | - else |
64 | | - let reader = read_form list_reader.tokens in |
65 | | - read_list eol |
66 | | - { |
67 | | - list_form = list_reader.list_form @ [ reader.form ]; |
68 | | - tokens = reader.tokens; |
69 | | - } |
70 | | - |
71 | | -and read_quote sym tokens = |
72 | | - let reader = read_form tokens in |
73 | | - { form = Types.list [ T.Symbol sym; reader.form ]; tokens = reader.tokens } |
74 | | - |
75 | | -and read_form all_tokens = |
76 | | - match all_tokens with |
77 | | - | [] -> raise (Invalid_argument "no form found in the given string") |
78 | | - | token :: tokens -> ( |
79 | | - match token with |
80 | | - | "'" -> read_quote "quote" tokens |
81 | | - | "`" -> read_quote "quasiquote" tokens |
82 | | - | "~" -> read_quote "unquote" tokens |
83 | | - | "~@" -> read_quote "splice-unquote" tokens |
84 | | - | "@" -> read_quote "deref" tokens |
85 | | - | "^" -> |
86 | | - let meta = read_form tokens in |
87 | | - let value = read_form meta.tokens in |
88 | | - { |
89 | | - form = Types.list [ T.Symbol "with-meta"; value.form; meta.form ]; |
90 | | - tokens = value.tokens; |
91 | | - } |
92 | | - | "(" -> |
93 | | - let list_reader = read_list ")" { list_form = []; tokens } in |
94 | | - { |
95 | | - form = Types.list list_reader.list_form; |
96 | | - tokens = list_reader.tokens; |
97 | | - } |
98 | | - | "{" -> |
99 | | - let list_reader = read_list "}" { list_form = []; tokens } in |
100 | | - { |
101 | | - form = Types.list_into_map Types.MalMap.empty list_reader.list_form; |
102 | | - tokens = list_reader.tokens; |
103 | | - } |
104 | | - | "[" -> |
105 | | - let list_reader = read_list "]" { list_form = []; tokens } in |
106 | | - { |
107 | | - form = Types.vector list_reader.list_form; |
108 | | - tokens = list_reader.tokens; |
109 | | - } |
110 | | - | _ -> |
111 | | - if token.[0] = ';' then read_form tokens |
112 | | - else { form = read_atom token; tokens }) |
| 22 | +let unescape str = |
| 23 | + let e = match_end () - 1 in |
| 24 | + if str.[e] == 'n' then "\n" else String.sub str e 1 |
113 | 25 |
|
114 | 26 | let read_str str = |
115 | | - (read_form (List.filter (( <> ) "") (find_re token_re str))).form |
| 27 | + (* !p is the currently parsed position inside str *) |
| 28 | + let rec read pattern p = |
| 29 | + let result = string_match pattern str !p in |
| 30 | + if result then p := match_end (); |
| 31 | + result |
| 32 | + and read_list p = |
| 33 | + ignore (read separator_re p); |
| 34 | + if read close_re p then [] |
| 35 | + else |
| 36 | + (* Parse the first form before the rest of the list *) |
| 37 | + let first = read_form p in |
| 38 | + first :: read_list p |
| 39 | + and read_form p = |
| 40 | + ignore (read separator_re p); |
| 41 | + if read number_re p then Types.Int (int_of_string (matched_string str)) |
| 42 | + else if read keyword_re p then Keyword (matched_group 1 str) |
| 43 | + else if read symbol_re p then |
| 44 | + match matched_string str with |
| 45 | + | "nil" -> Nil |
| 46 | + | "true" -> Bool true |
| 47 | + | "false" -> Bool false |
| 48 | + | t -> Symbol t |
| 49 | + else if read string_re p then |
| 50 | + String (global_substitute escape_re unescape (matched_group 1 str)) |
| 51 | + else if read quote_re p then list [ Symbol "quote"; read_form p ] |
| 52 | + else if read quasiquote_re p then list [ Symbol "quasiquote"; read_form p ] |
| 53 | + else if read deref_re p then list [ Symbol "deref"; read_form p ] |
| 54 | + else if read sp_unq_re p then list [ Symbol "splice-unquote"; read_form p ] |
| 55 | + else if read unquote_re p then list [ Symbol "unquote"; read_form p ] |
| 56 | + else if read with_meta_re p then |
| 57 | + (* Parse the metadata before the value *) |
| 58 | + let meta = read_form p in |
| 59 | + list [ Symbol "with-meta"; read_form p; meta ] |
| 60 | + else if read list_re p then list (read_list p) |
| 61 | + else if read vector_re p then vector (read_list p) |
| 62 | + else if read map_re p then list_into_map MalMap.empty (read_list p) |
| 63 | + else raise (Invalid_argument "unexpected EOF ] } ) or string escape") |
| 64 | + in |
| 65 | + read_form (ref 0) |
0 commit comments