我有一个包含 25000 个文本文件的文件夹,我想阅读这些文件并将单词放入表中。我的文本文件以以下格式命名 1.txt、2.txt、........和以此类推至 25000.txt。每个文本文件都包含以下形式的单词。
sample contents of my file
apple
cat
rat
shoe
这些单词也可能在其他文本文件中重复,我想要可以读取文本文件的 ac# 代码识别重复的单词以及不重复的单词,然后以下列形式将它们插入 Sqlserver 中的数据库。
keyword document name
cat 1.txt,2.txt,3.txt
rat 4.txt,1.txt
fish 5.txt
`
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
using System.Data.SqlClient;
namespace RAMESH
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void textBox1_TextChanged(object sender, EventArgs e)
{
}
private void button2_Click(object sender, EventArgs e)
{
string[] files = Directory.GetFiles(textBox1.Text, "*.txt");
int i;
string sqlstmt,str;
SqlConnection con = new SqlConnection("data source=dell-pc\\sql1; initial catalog=db; user id=sa; password=a;");
SqlCommand cmd;
sqlstmt = "delete from Items";
cmd = new SqlCommand(sqlstmt, con);
con.Open();
cmd.ExecuteNonQuery();
for (i = 0; i < files.Length; i++)
{
StreamReader sr = new StreamReader(files[i]);
FileInfo f = new FileInfo(files[i]);
string fname;
fname = f.Name;
fname = fname.Substring(0, fname.LastIndexOf('.'));
//MessageBox.Show(fname);
while ((str = sr.ReadLine()) != null)
{
int nstr=1;
//int x,y;
//for (x = 0; x < str.Length; x++)
//{
// y = Convert.ToInt32(str.Substring(x,1));
// if ((y < 48 && y > 75) || (y < 65 && y > 97) || (y < 97 && y > 122)) ;
//}
sqlstmt = "insert into Items values('" + str + "','" + fname + "')";
cmd = new SqlCommand(sqlstmt, con);
try
{
cmd.ExecuteNonQuery();
}
catch (Exception ex)
{
sqlstmt = "update Items set docname=docname + '," + fname + "' where itemname='" + str + "'";
cmd = new SqlCommand(sqlstmt, con);
cmd.ExecuteNonQuery();
}
}
sr.Close();
}
MessageBox.Show("keywords added successfully");
con.Close();
}
}
} `