我有一个大的基因组序列,我需要将它拆分成小的 .txt 文件。
序列看起来像这样
>supercont1.1 of Geomyces destructans 20631-21
AGATTTTCTTAATAACTTGTTCAATGTGTGTTCAAATGATATGCCGTGATGTATGTAGCA
TAAACAGATGTAGTAGAAGAGTTTGCAGCAATCGTTGAGTAGTATTGCTTCTGTTGTTGG
>supercont1.2 of Geomyces destructans 20631-21
AGATTTTCTTAATAACTTGTTCAATGTGTGTTCAAATGATATGCCGTGATGTATGTAGCA
TAAACAGATGTAGTAGAAGAGTTTGCAGCAATCGTTGAGTAGTATTGCTTCTGTTGTTGG
TAAACAGATGTAGTAGAAGAGTTTGCAGCAATCGTTGAGTAGTATTGCTTCTGTTGTTGG
>supercont1.3 of Geomyces destructans 20631-21
AGATTTT (...)
并且应该将其拆分为具有以下名称的小文件:“1.1-Geomyces-destructans--20631-21”、“1.2-Geomyces...”,并包含基因组数据。
@JimMischel 帮助后的代码如下所示:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
namespace genom1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
string filter = "Textové soubory|*.txt|Soubory FASTA|*.fasta|Všechny soubory|*.*";
private void doit_Click(object sender, EventArgs e)
{
bar.Value = 0;
OpenFileDialog opf = new OpenFileDialog();
// filter for choosing file types
opf.Filter = filter;
string lineo = "error"; // test
if (opf.ShowDialog() == DialogResult.OK)
{
var lineCount = 0;
using (var reader = File.OpenText(opf.FileName))
{
while (reader.ReadLine() != null)
{
lineCount++;
}
}
bar.Maximum = lineCount;
bar.Step = 1;
FolderBrowserDialog fbd = new FolderBrowserDialog();
fbd.Description = "Vyber složku, do které chceš rozdělit načtený soubor: \n\n" + opf.FileName; // dialog desc
if (fbd.ShowDialog() == DialogResult.OK)
{
List<string> lines = new List<string>();
foreach (var line in File.ReadLines(opf.FileName))
{
bar.PerformStep();
if (line[0] == '>')
{
if (lines.Count >= 0)
{
// write contents of lines list to file
//quicker replace for better file name
StringBuilder prep = new StringBuilder(line);
prep.Replace(">supercont", "");
prep.Replace("of", "");
prep.Replace(" ", "-");
lineo = prep.ToString();
// append or writeall? how to writeall lines without append?
//System.IO.File.WriteAllText(fbd.SelectedPath + "\\" + lineo + ".txt", lineo);
StreamWriter SW;
SW = File.AppendText(fbd.SelectedPath + "\\" + lineo + ".txt");
foreach (string s in lines)
{
SW.WriteLine(s);
}
SW.Close();
// and clear the list.
lines.Clear();
}
}
lines.Add(line);
}
// here, do the last part
if (lines.Count >= 0)
{
// write contents of lines list to file.
/* starts being little buggy here...
StreamWriter SW;
SW = File.AppendText(fbd.SelectedPath + "\\" + lineo + ".txt");
foreach (string s in lines)
{
SW.WriteLine(s);
}
SW.Close();
*/
}
}
}
}
}
}