sexp是这样的:type sexp = Atom of string | List of sexp list
,例如,"((a b) ((c d) e) f)"
。
我写了一个解析器来解析一个 sexp 字符串的类型:
let of_string s =
let len = String.length s in
let empty_buf () = Buffer.create 16 in
let rec parse_atom buf i =
if i >= len then failwith "cannot parse"
else
match s.[i] with
| '(' -> failwith "cannot parse"
| ')' -> Atom (Buffer.contents buf), i-1
| ' ' -> Atom (Buffer.contents buf), i
| c when i = len-1 -> (Buffer.add_char buf c; Atom (Buffer.contents buf), i)
| c -> (Buffer.add_char buf c; parse_atom buf (i+1))
and parse_list acc i =
if i >= len || (i = len-1 && s.[i] <> ')') then failwith "cannot parse"
else
match s.[i] with
| ')' -> List (List.rev acc), i
| '(' ->
let list, j = parse_list [] (i+1) in
parse_list (list::acc) (j+1)
| c ->
let atom, j = parse_atom (empty_buf()) i in
parse_list (atom::acc) (j+1)
in
if s.[0] <> '(' then
let atom, j = parse_atom (empty_buf()) 0 in
if j = len-1 then atom
else failwith "cannot parse"
else
let list, j = parse_list [] 1 in
if j = len-1 then list
else failwith "cannot parse"
但我认为它太冗长和丑陋。
有人可以用一种优雅的方式帮助我编写这样的解析器吗?
实际上,我在编写解析器代码时总是遇到问题,我只能写这么丑的一个。
这种解析有什么技巧吗?如何有效地处理暗示递归解析的符号,例如(
, ?)