1

以下输入文件

输入文件

a    00002098    0    0.75    unable#1    (usually followed by `to') not having the necessary means or skill or know-how; "unable to get to town without a car"; "unable to obtain funds"
a    00002312    0.23    0.43    dorsal#2 abaxial#1    facing away from the axis of an organ or organism; "the abaxial surface of a leaf is the underside or side facing away from the stem"
a    00023655    0    0.5    outside#10 away#3 able#2    (of a baseball pitch) on the far side of home plate from the batter; "the pitch was away (or wide)"; "an outside pitch"    

我想要这个文件
输出的以下结果

a,00002098,0,0.75,unable#1
a,00002312,0.23,0.43,dorsal#2 
a,00002312,0.23,0.43,abaxial#1    
a,00023655,0, 0.5,outside#10    
a,00023655,0, 0.5,away#3
a,00023655,0, 0.5,able#2    

我编写以下代码来提取上述结果

 TextWriter tw = new StreamWriter("D:\\output.txt");

        private void button1_Click(object sender, EventArgs e)
        {
            if (textBox1.Text != null)
            {
                StreamReader reader = new StreamReader(@"C:\Users\Zia\Desktop\input.txt");
                string line;
                String lines = "";
                while ((line = reader.ReadLine()) != null)
                {
                    String[] str = line.Split('\t');
                    String[] words = str[3].Split(' ');
                    for (int k = 0; k < words.Length; k++)
                    {
                        for (int i = 0; i < str.Length; i++)
                        {
                            if (i + 1 != str.Length)
                            {
                                lines = lines + str[i] + ",";
                            }
                            else
                            {
                                lines = lines + words[k] + "\r\n";
                            }
                        }
                    }
                }
                tw.Write(lines);
                tw.Close();
                reader.Close();
            }
        }    

当我更改索引时,此代码会给出以下错误,而不是给出期望的结果。
错误
索引超出了数组的范围。
提前致谢。

4

3 回答 3

2

为什么不试试这个算法,循环文本中的每一行:

var elements = line.Split('\t');
var words = elements[4].Split(' ');
foreach(var word in words)
{
    Console.WriteLine(string.Concat(elements[0], ",", elements[1], ",", elements[2], ",", elements[3], ",", word));
}

这似乎准确地输出了您需要的内容。只需更改Console.WriteLine写入文件即可。

于 2013-05-26T08:11:56.447 回答
1

我知道您希望包含的每个单词(在最后一列中)都#应该是一个新的结果行所以它应该类似于

        List<string> result = new List<string>();

        var lines = str.Split('\n');
        foreach (var line in lines)
        {
            var words = line.Split('\t');
            string res = String.Format("{1}{0}{2}{0}{3}{0}{4}", ",", words[0], words[1], words[2], words[3]);

            var xx = words[4].Split(' ').Where(word => word.Contains("#"));
            foreach (var s in xx)
            {
                result.Add(String.Format(res + "," + s));
            }
        }
于 2013-05-26T09:37:51.517 回答
0
       private void extrcat()
       {
            char[] delimiters = new char[] { '\r', '\n' };
            using (StreamReader reader = new StreamReader(@"C:\Users\Zia\Desktop\input.txt"))
            {
                string words = reader.ReadToEnd();
                string[] lines = words.Split(delimiters);
                foreach (var item in lines)
                {
                    foreach (var i in findItems(item))
                    {
                        if (i != " ")
                            Console.WriteLine(i);
                    }
                }

            }

        }
        private static List<string> findItems(string item)
        {
            List<string> items = new List<string>();

            if (item.Length <= 0)
            {
                items.Add(" ");
            }
            else
            {
                List<string> names = new List<string>();
                string temp = item.Substring(0, item.IndexOf("#") + 2);
                temp = temp.Replace("\t", ",");
                temp = temp.Replace("\\t", ",");


                items.Add(temp);
                names = item.Split(' ').Where(x => x.Contains('#')).ToList();
                int i = 1;
                while (i < names.Count)
                {
                    temp = items[0].Substring(0, items[0].LastIndexOf(',')+1) + names[i];
                    items.Add(temp);
                    i++;
                }
            }

            return items;

        }

在此处输入图像描述

于 2013-05-26T12:01:15.777 回答