(这个关于重构 F# 代码的问题让我投了反对票,但也有一些有趣且有用的答案。在 SO 上的 32,000 多个问题中,有 62 个 F# 问题似乎很可怜,所以我要冒更多不赞成的风险!)
昨天我试图在博客上发布一些代码,然后转向这个网站,我过去发现它很有用。然而,博主编辑吃掉了所有的风格声明,结果证明这是一条死胡同。
所以(就像任何黑客一样),我想“这有多难?” 并在 <100 行的 F# 中滚动了我自己的。
这是代码的“肉”,它将输入字符串转换为“令牌”列表。请注意,不要将这些标记与 lexing/parsing-style 标记混淆。我确实简要地看了这些,虽然我几乎什么都不懂,但我确实明白它们只会给我标记,而我想保留我的原始字符串。
问题是:有没有更优雅的方式来做到这一点?我不喜欢从输入字符串中删除每个标记字符串所需的 s 的 n 重新定义,但是由于注释、字符串和 #region 指令(其中包含非单词字符)。
//Types of tokens we are going to detect
type Token =
| Whitespace of string
| Comment of string
| Strng of string
| Keyword of string
| Text of string
| EOF
//turn a string into a list of recognised tokens
let tokenize (s:String) =
//this is the 'parser' - should we look at compiling the regexs in advance?
let nexttoken (st:String) =
match st with
| st when Regex.IsMatch(st, "^\s+") -> Whitespace(Regex.Match(st, "^\s+").Value)
| st when Regex.IsMatch(st, "^//.*?\r?\n") -> Comment(Regex.Match(st, "^//.*?\r?\n").Value) //this is double slash-style comments
| st when Regex.IsMatch(st, "^/\*(.|[\r?\n])*?\*/") -> Comment(Regex.Match(st, "^/\*(.|[\r?\n])*?\*/").Value) // /* */ style comments http://ostermiller.org/findcomment.html
| st when Regex.IsMatch(st, @"^""([^""\\]|\\.|"""")*""") -> Strng(Regex.Match(st, @"^""([^""\\]|\\.|"""")*""").Value) // unescaped = "([^"\\]|\\.|"")*" http://wordaligned.org/articles/string-literals-and-regular-expressions
| st when Regex.IsMatch(st, "^#(end)?region") -> Keyword(Regex.Match(st, "^#(end)?region").Value)
| st when st <> "" ->
match Regex.Match(st, @"^[^""\s]*").Value with //all text until next whitespace or quote (this may be wrong)
| x when iskeyword x -> Keyword(x) //iskeyword uses Microsoft.CSharp.CSharpCodeProvider.IsValidIdentifier - a bit fragile...
| x -> Text(x)
| _ -> EOF
//tail-recursive use of next token to transform string into token list
let tokeneater s =
let rec loop s acc =
let t = nexttoken s
match t with
| EOF -> List.rev acc //return accumulator (have to reverse it because built backwards with tail recursion)
| Whitespace(x) | Comment(x)
| Keyword(x) | Text(x) | Strng(x) ->
loop (s.Remove(0, x.Length)) (t::acc) //tail recursive
loop s []
tokeneater s
(如果有人真的感兴趣,我很乐意发布其余的代码)
编辑 使用kvb对活动模式的出色建议,中心位看起来像这样,好多了!
let nexttoken (st:String) =
match st with
| Matches "^\s+" s -> Whitespace(s)
| Matches "^//.*?\r?(\n|$)" s -> Comment(s) //this is double slash-style comments
| Matches "^/\*(.|[\r?\n])*?\*/" s -> Comment(s) // /* */ style comments http://ostermiller.org/findcomment.html
| Matches @"^@?""([^""\\]|\\.|"""")*""" s -> Strng(s) // unescaped regexp = ^@?"([^"\\]|\\.|"")*" http://wordaligned.org/articles/string-literals-and-regular-expressions
| Matches "^#(end)?region" s -> Keyword(s)
| Matches @"^[^""\s]+" s -> //all text until next whitespace or quote (this may be wrong)
match s with
| IsKeyword x -> Keyword(s)
| _ -> Text(s)
| _ -> EOF