0

我有看起来像这样的数据。

CONTIG  POSITION    INFO
CF1_seqReads_contig_5   117537  .   C   T   222 .   DP=31;VDB=0.0507;AF1=1;AC1=2;DP4=0,0,16,11;MQ=38;FQ=-108    GT:PL:GQ    1/1:255,81,0:99
CF1_seqReads_contig_5   117541  .   A   T   222 .   DP=30;VDB=0.0381;AF1=1;AC1=2;DP4=0,0,16,11;MQ=39;FQ=-108    GT:PL:GQ    1/1:255,81,0:99
CF1_seqReads_contig_9   96299   .   G   T   150 .   DP=44;VDB=0.0330;AF1=1;AC1=2;DP4=0,0,5,20;MQ=31;FQ=-102 GT:PL:GQ    1/1:183,75,0:99
CF1_seqReads_contig_9   96305   .   G   C   88  .   DP=43;VDB=0.0279;AF1=1;AC1=2;DP4=0,0,4,5;MQ=29;FQ=-54   GT:PL:GQ    1/1:121,27,0:51
CF1_seqReads_contig_11  73382   .   G   C   37.2    .   DP=19;VDB=0.0063;AF1=0.5263;AC1=1;DP4=0,1,2,3;MQ=47;FQ=-17.1;PV4=1,0.14,1,0.0015    GT:PL:GQ    0/1:67,0,10:13
CF1_seqReads_contig_11  130161  .   G   T   59.3    .   DP=30;VDB=0.0545;AF1=1;AC1=2;DP4=0,0,2,3;MQ=55;FQ=-42   GT:PL:GQ    1/1:92,15,0:27
CF1_seqReads_contig_16  8578    .   G   A   167 .   DP=34;VDB=0.0187;AF1=1;AC1=2;DP4=0,0,3,19;MQ=29;FQ=-93  GT:PL:GQ    1/1:200,66,0:99
CF1_seqReads_contig_19  169 .   C   T   110 .   DP=37;VDB=0.0545;AF1=0.5336;AC1=1;DP4=0,3,1,9;MQ=36;FQ=-18.1;PV4=1,0.33,1,0.014 GT:PL:GQ    0/1:140,0,9:12
CF1_seqReads_contig_19  190 .   G   T   30.1    .   DP=39;VDB=0.0004;AF1=0.5102;AC1=1;DP4=0,3,6,0;MQ=47;FQ=-13.2;PV4=0.012,1,1,1    GT:PL:GQ    0/1:60,0,14:17
CF1_seqReads_contig_20  229322  .   C   T   222 .   DP=55;VDB=0.0521;AF1=1;AC1=2;DP4=0,0,19,27;MQ=57;FQ=-165    GT:PL:GQ    1/1:255,138,0:99
CF1_seqReads_contig_40  184 .   C   A   36.5    .   DP=23;VDB=0.0014;AF1=1;AC1=2;DP4=1,0,11,0;MQ=29;FQ=-34;PV4=1,0.0039,1,0.29  GT:PL:GQ    1/1:68,7,0:12
CF1_seqReads_contig_40  194 .   C   T   59  .   DP=28;VDB=0.0014;AF1=1;AC1=2;DP4=0,1,17,0;MQ=29;FQ=-62;PV4=0.056,0.44,1,0.3 GT:PL:GQ    1/1:92,35,0:67
CF1_seqReads_contig_40  197 .   C   G   59  .   DP=29;VDB=0.0063;AF1=1;AC1=2;DP4=0,0,11,0;MQ=29;FQ=-60  GT:PL:GQ    1/1:92,33,0:63
CF1_seqReads_contig_49  111973  .   G   A   32.3    .   DP=13;VDB=0.0063;AF1=0.543;AC1=1;DP4=0,1,3,1;MQ=29;FQ=-19;PV4=0.4,0.44,1,0.16   GT:PL:GQ    0/1:62,0,8:11
CF1_seqReads_contig_49  124688  .   G   T   42.1    .   DP=48;VDB=0.0532;AF1=0.5064;AC1=1;DP4=2,0,4,3;MQ=45;FQ=-11.3;PV4=0.5,0.095,1,0.099  GT:PL:GQ    0/1:72,0,16:19
CF1_seqReads_contig_51  3725    .   C   T   39.3    .   DP=24;VDB=0.0381;AF1=1;AC1=2;DP4=0,0,1,4;MQ=29;FQ=-42   GT:PL:GQ    1/1:72,15,0:27
CF1_seqReads_contig_79  24357   .   C   T   80.1    .   DP=36;VDB=0.0435;AF1=1;AC1=2;DP4=0,0,4,3;MQ=34;FQ=-48   GT:PL:GQ    1/1:113,21,0:39
CF1_seqReads_contig_79  24367   .   C   T   65  .   DP=40;VDB=0.0187;AF1=1;AC1=2;DP4=0,0,7,1;MQ=38;FQ=-51   GT:PL:GQ    1/1:98,24,0:45
CF1_seqReads_contig_115 14710   .   C   A   80  .   DP=40;VDB=0.0381;AF1=1;AC1=2;DP4=0,0,1,7;MQ=60;FQ=-51   GT:PL:GQ    1/1:113,24,0:45
CF1_seqReads_contig_167 4920    .   G   T   60  .   DP=19;VDB=0.0085;AF1=0.5008;AC1=1;DP4=1,2,4,2;MQ=41;FQ=-4.12;PV4=0.52,1,0.077,0.23  GT:PL:GQ    0/1:90,0,25:28

我正在尝试从同一 CONTIG 中的其他行中删除属于同一 CONTIG 且 POSITION 小于 30 的行。例如,前两行将被删除,因为它们共享相同的 contig,并且它们的位置(117537 和 117541)相差小于 15。第三行和第四行也是如此。另一方面,将打印第五行和第六行,因为即使它们共享相同的 CONTIG,它们的位置也相差超过 15 (130161-73382>15)。我试图解决这个问题

public void getFilteredData(File in){
   String contig = "";
   int position = -1;
   String lastContig = "";
   int lastPosition = -1;
   String nextContig = "";
   int nextPosition = -1;

   while(in.hasNextLine()){
      String curLine = in.nextLine();
      String[] curInfo = curLine.split("\t");

      /*Isolate the snp position*/ 
      contig = curInfo[0];
      position = Integer.parseInt(curInfo[1]);
      if(file.hasNextLine()){
         String nextLine = file.nextLine();
         nextContig = nextLine.split("\t")[0];
         nextPosition = Integer.parseInt(nextLine.split("\t")[1]);
         if(nextContig.equals(contig)){
            if( nextPosition-position<15){
               System.out.println(curLine);
            }
         }
         lastContig = contig;
         lastPosition = position;
      }
   }
}  

这段代码没有给我我正在寻找的结果。你能帮我解开这个问题吗?我认为为上一条和下一条信息分配值会有所帮助,但我不知所措。谢谢

4

1 回答 1

0

只是工作代码,可能会进一步优化。

公共静态 void main(String[] args) 抛出异常 {

    BufferedReader reader = new BufferedReader(new FileReader("F:/contig.txt"));
    String currLine;
    String[] array;
    String contig;
    int position;

    String nextLine;
    String[] nextArray;
    String nextContig;
    int nextPosition;

    reader.readLine(); // ignore first line
    while ((currLine = reader.readLine()) != null) {

        array = currLine.split("\\s+");
        contig = array[0];
        position = Integer.parseInt(array[1]);

        nextLine = reader.readLine();
        if (nextLine != null) {
            nextArray = nextLine.split("\\s+");
            nextContig = nextArray[0];
            nextPosition = Integer.parseInt(nextArray[1]);

            if (nextContig.equals(contig)) {
                if (nextPosition - position < 15) {
                    System.out.println(currLine);
                }
            }
        } else {
            break;
        }
    }

    reader.close();
}
于 2013-02-22T08:24:21.647 回答