NOT x NOT y NOT z
我也尝试以这些方式回答一些简单的布尔查询,x AND y AND z
并且像这样x OR y OR z
x,y,z
的一些单词,它们中的任何一个属于不同的file.txt
或者它们都属于相同的file.txt
,无论如何。
我写了一个class TermDocMatrix
:
它必须能够回答布尔查询,我为此准备了一些方法,class TermDocMatrix{ }
但它不起作用。我什至一步一步调试代码,我意识到循环没有转。我不知道为什么,代码似乎很好。
那么你可以在这里看到代码:
class TermDocMatrix
{
//stores distinct terms
public HashSet<string> distinctTerm = new HashSet<string>();
//stores document id and its contents without splitting
public Dictionary<int, string> documentContentList = new Dictionary<int, string>();
//stores document and its terms collection
public Dictionary<string, List<string>> documentCollection = new Dictionary<string, List<string>>();
public Dictionary<string, List<int>> termDocumentIncidenceMatrix = new Dictionary<string, List<int>>();
//stop words collection
public List<string> stopWords = new List<string> { "on", "of", "The", "an", "a", "in" };
//boolean operators list
public string[] booleanOperator = new string[] { "AND", "OR", "NOT" };
private string _FileName = "words";
public string _Path = "";
int _lastDocNum = 0;
public TermDocMatrix(string IndexPath,string FileName)
{
if (_Path.EndsWith("\\") == false) _Path += "\\";
if (!Directory.Exists(IndexPath)) Directory.CreateDirectory(IndexPath);
LogManager.Configure(_Path + _FileName + ".txt", false);
// read all files
LoadFiles();
}
private void LoadFiles()
{
int count = 0;
if (File.Exists(_Path + _FileName + ".txt") == false)
return;
// load words
string b = File.ReadAllText(_Path + _FileName + ".txt");
String[] TermsCollection = RemoveStopsWords(b.ToUpper().Split(' '));
foreach (string term in TermsCollection)
{
//prepeare distinct terms collection
//remove stop words
if (!stopWords.Contains(term))
{
distinctTerm.Add(term);
}
}
//add document and their terms collection
documentCollection.Add(_FileName, TermsCollection.ToList());
//add document and its content for displaying the search result
documentContentList.Add(count, b);
count++;
}
public string ProcessFiles(string query)
{
termDocumentIncidenceMatrix = GetTermDocumentIncidenceMatrix(distinctTerm, documentCollection);
do
{
List<int> lst = ProcessQuery(query);
int count = 0;
if (lst != null)
{
foreach (int a in lst)
{
if (a == 1)
{
return documentContentList[count];
}
count++;
}
}
else
{
return "No search result found";
}
} while (1 == 1);
}
public int WordCount()
{
return documentCollection.Count;
}
public int DocumentCount
{
get
{
return _lastDocNum;
}
}
private void FilterQueryTerm(ref string[] str)
{
List<string> _queryTerm = new List<string>();
foreach (string queryTerm in str)
{
if (queryTerm.ToUpper().Equals("BUT") || termDocumentIncidenceMatrix.ContainsKey(queryTerm.ToUpper()) || booleanOperator.Contains(queryTerm))
{
_queryTerm.Add(queryTerm);
}
}
str = _queryTerm.ToArray();
}
//prepares Term Document Incidence Matrix
public Dictionary<string, List<int>> GetTermDocumentIncidenceMatrix(HashSet<string> distinctTerms, Dictionary<string, List<string>> documentCollection)
{
Dictionary<string, List<int>> termDocumentIncidenceMatrix = new Dictionary<string, List<int>>();
List<int> incidenceVector = new List<int>();
foreach (string term in distinctTerms)
{
//incidence vector for each terms
incidenceVector = new List<int>();
foreach (KeyValuePair<string, List<string>> p in documentCollection)
{
if (p.Value.Contains(term))
{
//document contains the term
incidenceVector.Add(1);
}
else
{
//document do not contains the term
incidenceVector.Add(0);
}
}
termDocumentIncidenceMatrix.Add(term, incidenceVector);
}
return termDocumentIncidenceMatrix;
}
//removes all stop words
public string[] RemoveStopsWords(string[] str)
{
List<string> terms = new List<string>();
foreach (string term in str)
{
if (!stopWords.Contains(term))
{
terms.Add(term);
}
}
return terms.ToArray();
}
//process the boolean query
public List<int> ProcessQuery(string query)
{
//query boolean operator
string bitWiseOp = string.Empty;
string[] queryTerm = RemoveStopsWords(query.ToUpper().Split(' '));
//remove query term that doesnot appears on document collection
FilterQueryTerm(ref queryTerm);
List<int> previousTermIncidenceV = null;
List<int> nextTermsIncidenceV = null;
//holds the bitwise operation result
List<int> resultSet = null;
//suppose on query X AND Y, X is previousTerm term and Y is nextTerm
Boolean hasPreviousTerm = false;
Boolean hasNotOperation = false;
foreach (string term in queryTerm)
{
//is a term
if (!booleanOperator.Contains(term) && !term.Equals("BUT"))
{
//query case: structure AND NOT analysis
if (hasNotOperation)
{
if (hasPreviousTerm)
{
nextTermsIncidenceV = ProcessBooleanOperator("NOT", GetTermIncidenceVector(term), nextTermsIncidenceV);
}
//query case: eg.NOT analysis
else
{
previousTermIncidenceV = ProcessBooleanOperator("NOT", GetTermIncidenceVector(term), nextTermsIncidenceV);
resultSet = previousTermIncidenceV;
}
hasNotOperation = false;
}
else if (!hasPreviousTerm)
{
previousTermIncidenceV = GetTermIncidenceVector(term);
resultSet = previousTermIncidenceV;
hasPreviousTerm = true;
}
else
{
nextTermsIncidenceV = GetTermIncidenceVector(term);
}
}
else if (term.Equals("NOT"))
{
//indicates that the term in the next iteration should be complemented.
hasNotOperation = true;
}
else
{
//'BUT' also should be evaluated as AND eg. structure BUT NOT semantic should be evaluated as structure AND NOT semantic
if (term.Equals("BUT"))
{
bitWiseOp = "AND";
}
else
bitWiseOp = term;
}
if (nextTermsIncidenceV != null && !hasNotOperation)
{
resultSet = ProcessBooleanOperator(bitWiseOp, previousTermIncidenceV, nextTermsIncidenceV);
previousTermIncidenceV = resultSet;
hasPreviousTerm = true;
nextTermsIncidenceV = null;
}
}
return resultSet;
}
//Process Boolean operators
public List<int> ProcessBooleanOperator(string op, List<int> previousTermV, List<int> nextTermV)
{
List<int> resultSet = new List<int>();
if (op.Equals("NOT"))
{
foreach (int a in previousTermV)
{
if (a == 1)
{
resultSet.Add(0);
}
else
{
resultSet.Add(1);
}
}
}
else if (op.ToUpper().Equals("AND")) //bitwise AND operation
{
for (int a = 0; a < previousTermV.Count; a++)
{
if (previousTermV[a] == 1 && nextTermV[a] == 1)
{
resultSet.Add(1);
}
else
{
resultSet.Add(0);
}
}
}
else if (op.ToUpper().Equals("OR")) //bitwise OR operation
{
for (int a = 0; a < previousTermV.Count; a++)
{
if (previousTermV[a] == 0 && nextTermV[a] == 0)
{
resultSet.Add(0);
}
else
{
resultSet.Add(1);
}
}
}
return resultSet;
}
//returns term incidence vector
public List<int> GetTermIncidenceVector(string term)
{
return termDocumentIncidenceMatrix[term.ToUpper()];
}
}
您现在需要了解另一个名为Class LogManager
I used it on my Class TermDocMatrix
. 这里 :
namespace WindowsFormsApplication1
{
internal class FileLogger
{
public static readonly FileLogger Instance = new FileLogger();
private string _filename;
private bool _showMethodName = false;
private string _FilePath = "";
public bool ShowMethodNames
{
get { return _showMethodName; }
}
public void Init(string filename, bool showmethodnames)
{
_showMethodName = showmethodnames;
_filename = filename;
// handle folder names as well -> create dir etc.
_FilePath = Path.GetDirectoryName(filename);
if (_FilePath != "")
{
_FilePath = Directory.CreateDirectory(_FilePath).FullName;
if (_FilePath.EndsWith("\\") == false)
_FilePath += "\\";
}
}
}
internal static class LogManager
{
public static void Configure(string filename, bool showmethodnames)
{
FileLogger.Instance.Init(filename, showmethodnames);
}
}
}
它必须工作,但它没有。请告诉我为什么它不起作用。当我要求回答时,我只会看到这个“未找到搜索结果”,无论我输入了什么样的布尔查询。