所以我正在开发一个可以修补损坏的单词的应用程序。
让我们来:
很多人说这句话有错误
有了swift,我们就可以UITextChecker
得到一个很好的结果,这个词mny
实际上可能是什么......然而,我实际上有几个选择,其中一个是many
你有money
很明显的钱不适合在这句话中。有没有办法检查句子本身是否合乎逻辑?
所以我正在开发一个可以修补损坏的单词的应用程序。
让我们来:
很多人说这句话有错误
有了swift,我们就可以UITextChecker
得到一个很好的结果,这个词mny
实际上可能是什么......然而,我实际上有几个选择,其中一个是many
你有money
很明显的钱不适合在这句话中。有没有办法检查句子本身是否合乎逻辑?
考虑到这仍然需要改进。我将这个 swift 3解决方案更新为 Swift 5。值得一提的是,它最初是受这个 python 教程的启发
创建一个新的 iOS 项目,在其中添加一个名为的文本文件bigtext.txt
,该文件将包含此文本。这将是我们的“学习”词典。然后在ViewController
:
import UIKit
import NaturalLanguage
class ViewController: UIViewController {
override func viewDidLoad() {
super.viewDidLoad()
let inputString = "mny people say there is a error in this sentence"
var newString = inputString
// Read a text file and "study" the model
guard let path = Bundle.main.path(forResource: "bigtext", ofType: "txt") else {
print("Path not available")
return
}
let checker = SpellChecker(contentsOfFile: path)
// better to use this to iterate between words in a sentence
let tokenizer = NLTokenizer(unit: .word)
tokenizer.string = inputString
tokenizer.enumerateTokens(in: inputString.startIndex..<inputString.endIndex) { tokenRange, _ in
let word = String(inputString[tokenRange])
let checked = checker?.correct(word: word)
let candidates = checker?.candidates(word: word)
if word == checked {
print("\(word) unchanged")
} else {
if let checked = checked {
newString.replaceSubrange(tokenRange, with: checked)
}
print("Correct:\t\(word) -> \(String(describing: checked))")
print("Candidates:\t\(word) -> \(String(describing: candidates))")
}
return true
}
print("Result: \(newString)")
}
}
func edits(word: String) -> Set<String> {
if word.isEmpty { return [] }
let splits = word.indices.map {
(word[word.startIndex..<$0], word[$0..<word.endIndex])
}
let deletes = splits.map { $0.0 + String($0.1.dropFirst()) }
let transposes: [String] = splits.map { left, right in
if let fst = right.first {
let drop1 = String(right.dropFirst())
if let snd = drop1.first {
let drop2 = String(drop1.dropFirst())
return "\(left)\(snd)\(fst)\(drop2)"
}
}
return ""
}.filter { !$0.isEmpty }
let alphabet = "abcdefghijklmnopqrstuvwxyz"
let replaces = splits.flatMap { left, right in
alphabet.map { "\(left)\($0)\(String(right.dropFirst()))" }
}
let inserts = splits.flatMap { left, right in
alphabet.map { "\(left)\($0)\(right)" }
}
let setString = [String(deletes.first!)] + transposes + replaces + inserts
return Set(setString)
}
struct SpellChecker {
var knownWords: [String:Int] = [:]
mutating func train(word: String) {
if let idx = knownWords[word] {
knownWords[word] = idx + 1
}
else {
knownWords[word] = 1
}
}
init?(contentsOfFile file: String) {
do {
let text = try String(contentsOfFile: file, encoding: .utf8).lowercased()
let words = text.unicodeScalars.split(whereSeparator: { !("a"..."z").contains($0) }).map { String($0) }
for word in words { self.train(word: word) }
}
catch {
return nil
}
}
func knownEdits2(word: String) -> Set<String>? {
var known_edits: Set<String> = []
for edit in edits(word: word) {
if let k = known(words: edits(word: edit)) {
known_edits.formUnion(k)
}
}
return known_edits.isEmpty ? nil : known_edits
}
func known<S: Sequence>(words: S) -> Set<String>? where S.Iterator.Element == String {
let s = Set(words.filter { self.knownWords.index(forKey: $0) != nil })
return s.isEmpty ? nil : s
}
func candidates(word: String) -> Set<String> {
guard let result = known(words: [word]) ?? known(words: edits(word: word)) ?? knownEdits2(word: word) else {
return Set<String>()
}
return result
}
func correct(word: String) -> String {
return candidates(word: word).reduce(word) {
(knownWords[$0] ?? 1) < (knownWords[$1] ?? 1) ? $1 : $0
}
}
}
会输出你:
Correct: mny -> Optional("may")
Candidates: mny -> Optional(Set(["any", "ny", "may", "many"]))
people unchanged
say unchanged
there unchanged
is unchanged
a unchanged
error unchanged
in unchanged
this unchanged
sentence unchanged
Result: may people say there is a error in this sentence
请考虑我们采取了第一个更正候选人。首先需要理清词序,理解句子的上下文。