我想连接结构通过特里树的路径记录-仅在使用不匹配路径时记录,这是我的特里树搜索功能:
void Trie::suggestCorrectionsHelper(nodeT *w, int index, string record, string final, double edits, Trie::MatchesT &matchSet, string gap_record, int level)
{
if (record.empty() || w == NULL ) {//tried everyChar
matchSet.candidate = final+record;
if ( matchSet.candidate.length() == record_delimiterFormat.size() ) { //check length and record exists
updateSet(w, matchSet, edits, gap_record );
}
} else {//look for end of node paths
if(index == w->alpha.size() ) { return; //no more paths here
} else {
//CASE A: self
if (record[0] == w->alpha[index].token_char ) {//replace char with self so no edit distance
suggestCorrectionsHelper(w->alpha[index].next, 0, record.substr(1), final+w->alpha[index].token_char, edits, matchSet, gap_record, level+1); //follow char, no edit
//CASE B: follow new path
} else {
//CASE B1: wilds
if (record[0] == '*' && (final.length() + record.length() ) != record_delimiterFormat.size() ) {
suggestCorrectionsHelper(w->alpha[index].next, 0, '*'+record.substr(1), final+w->alpha[index].token_char, edits+1, matchSet, gap_record+=(IntegerToString(level)+","), level+1); //wilds, keep adding chars
//CASE B2: no wilds
} else {
suggestCorrectionsHelper(w->alpha[index].next, 0, record.substr(1), final+w->alpha[index].token_char, edits+1, matchSet, gap_record+=(IntegerToString(level)+","), level+1);//follow path
}
}
//CASE C: try next path - place outside else to allow finding alternative char even when self path is valid
suggestCorrectionsHelper(w, index+1, record, final, edits, matchSet, gap_record, level);//increment and check next path
}
}
}
路径完成后,将添加到:
//write to struct and then adds to struct vector
void Trie::updateSet(nodeT *w, Trie::MatchesT & matchSet, double &edits, string gap_record)
{
CorrectionT cs; // struct with attributes of match n
cs.suggestedRecord = matchSet.candidate; // add attributes to struct
cs.editDistance = edits;
cs.gap = gap_record;
matchSet.candidateStack.push_back(cs);
}
我期待一个输出看起来像0,1,3,5,12
但我明白了0,0,0,0,0,0,2,2,2,3,3,4,12
它不能超过一个级别,所以通过递归我正在记录路径,我想单独记录每条路径;如何?
trie 类如下所示:
void Trie::AddFirstNode(){ // run-once, initial condition of first node
nodeT *tempNode = new nodeT;
root = tempNode;
}
void Trie::InsertNode(nodeT *w, ParseT & packet, string codeSoFar) // add new char
{
for (unsigned int i = 0; i <= w->alpha.size(); i++) { // loop and insert tokens in sorted vector
if (i == w->alpha.size() || codeSoFar[0] < w->alpha[i].token_char) { //position
//create new TokenT
tokenT *tempChar = new tokenT;
tempChar->next = NULL;
tempChar->token_char = codeSoFar[0];
nodeT *tempLeaf = new nodeT; //create new nodeT
tempChar->next = tempLeaf; //link TokenT with its nodeT
AddRecord(tempLeaf, packet, codeSoFar.substr(1)); //add next char in record, if last char AddRecord will terminate
w->alpha.insert(w->alpha.begin()+i, *tempChar);
return;
}
}
}
//function to add record to trie
void Trie::AddRecord(nodeT *w, ParseT & packet, string codeSoFar)
{
if (codeSoFar.empty() ) { //condition 0: record's last char
return;
} else { //keep parsing down record path
for (unsigned int i = 0; i < w->alpha.size(); i++) {
if (codeSoFar[0] == w->alpha[i].token_char) {
return AddRecord(w->alpha[i].next, packet, codeSoFar.substr(1)); // condition 2: char exists
}
}
InsertNode(w, packet, codeSoFar); //condition 3: no existing char
}
}
结构
struct MatchesT {
string candidate;
double testDistance; //try edit distance
list<CorrectionT> candidateStack; //
};
输出
void PrintCorrections(Trie & tri, Trie::MatchesT & matchSet, ofstream& myfile)
{
while (!matchSet.candidateStack.empty() ) {
count++;
Trie::CorrectionT corr = matchSet.candidateStack.back();
matchSet.candidateStack.pop_back();
myfile << corr.suggestedRecord << "," << fixed << setprecision(2) << corr.editDistance << "," << corr.gap << "\n";
}