1

我有以下 XML 文件。我想从文件 out.xml 中获取 VB.NET 中标签HSP下的第一个Hsp_qseq、Hsp_hseq 和 Hsp_midline的值

<?xml version="1.0"?>
<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">
<BlastOutput>
  <BlastOutput_program>blastn</BlastOutput_program>
  <BlastOutput_version>BLASTN 2.2.25+</BlastOutput_version>
  <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), &quot;A greedy algorithm for aligning DNA sequences&quot;, J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference>
  <BlastOutput_db>positive_Controls</BlastOutput_db>
  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
  <BlastOutput_query-def>rs8192709_C Positive Contol Common Sequence</BlastOutput_query-def>
  <BlastOutput_query-len>249</BlastOutput_query-len>
  <BlastOutput_param>
    <Parameters>
      <Parameters_expect>10</Parameters_expect>
      <Parameters_sc-match>1</Parameters_sc-match>
      <Parameters_sc-mismatch>-2</Parameters_sc-mismatch>
      <Parameters_gap-open>0</Parameters_gap-open>
      <Parameters_gap-extend>0</Parameters_gap-extend>
      <Parameters_filter>L;m;</Parameters_filter>
    </Parameters>
  </BlastOutput_param>
  <BlastOutput_iterations>
    <Iteration>
      <Iteration_iter-num>1</Iteration_iter-num>
      <Iteration_query-ID>Query_1</Iteration_query-ID>
      <Iteration_query-def>rs8192709_C Positive Contol Common Sequence</Iteration_query-def>
      <Iteration_query-len>249</Iteration_query-len>
      <Iteration_hits>
        <Hit>
          <Hit_num>1</Hit_num>
          <Hit_id>gnl|BL_ORD_ID|0</Hit_id>
          <Hit_def>rs8192709_C Positive Contol Common Sequence</Hit_def>
          <Hit_accession>0</Hit_accession>
          <Hit_len>249</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>460.936057665848</Hsp_bit-score>
              <Hsp_score>249</Hsp_score>
              <Hsp_evalue>9.74431021697707e-133</Hsp_evalue>
              <Hsp_query-from>1</Hsp_query-from>
              <Hsp_query-to>249</Hsp_query-to>
              <Hsp_hit-from>1</Hsp_hit-from>
              <Hsp_hit-to>249</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_hit-frame>1</Hsp_hit-frame>
              <Hsp_identity>249</Hsp_identity>
              <Hsp_positive>249</Hsp_positive>
              <Hsp_gaps>0</Hsp_gaps>
              <Hsp_align-len>249</Hsp_align-len>
              <Hsp_qseq>GGTCAGGATAAAAGGCCCAGTTGGAGGCTGCAGCAGGGTGCAGGGCAGTCAGACCAGGACCATGGAACTCAGCGTCCTCCTCTTCCTTGCACTCCTCACAGGACTCTTGCTACTCCTGGTTCAGCGCCACCCTAACACCCATGACCGCCTCCCACCAGGGCCCCGCCCTCTGCCCCTTTTGGGAAACCTTCTGCAGATGGATAGAAGAGGCCTACTCAAATCCTTTCTGAGGGTAAGACACAGACGAAT</Hsp_qseq>
              <Hsp_hseq>GGTCAGGATAAAAGGCCCAGTTGGAGGCTGCAGCAGGGTGCAGGGCAGTCAGACCAGGACCATGGAACTCAGCGTCCTCCTCTTCCTTGCACTCCTCACAGGACTCTTGCTACTCCTGGTTCAGCGCCACCCTAACACCCATGACCGCCTCCCACCAGGGCCCCGCCCTCTGCCCCTTTTGGGAAACCTTCTGCAGATGGATAGAAGAGGCCTACTCAAATCCTTTCTGAGGGTAAGACACAGACGAAT</Hsp_hseq>
              <Hsp_midline>|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
        <Hit>
          <Hit_num>2</Hit_num>
          <Hit_id>gnl|BL_ORD_ID|29</Hit_id>
          <Hit_def>rs8192709_R Positive Control Rare Sequence </Hit_def>
          <Hit_accession>29</Hit_accession>
          <Hit_len>249</Hit_len>
          <Hit_hsps>
            <Hsp>
              <Hsp_num>1</Hsp_num>
              <Hsp_bit-score>455.396108708835</Hsp_bit-score>
              <Hsp_score>246</Hsp_score>
              <Hsp_evalue>4.53358655933358e-131</Hsp_evalue>
              <Hsp_query-from>1</Hsp_query-from>
              <Hsp_query-to>249</Hsp_query-to>
              <Hsp_hit-from>1</Hsp_hit-from>
              <Hsp_hit-to>249</Hsp_hit-to>
              <Hsp_query-frame>1</Hsp_query-frame>
              <Hsp_hit-frame>1</Hsp_hit-frame>
              <Hsp_identity>248</Hsp_identity>
              <Hsp_positive>248</Hsp_positive>
              <Hsp_gaps>0</Hsp_gaps>
              <Hsp_align-len>249</Hsp_align-len>
              <Hsp_qseq>GGTCAGGATAAAAGGCCCAGTTGGAGGCTGCAGCAGGGTGCAGGGCAGTCAGACCAGGACCATGGAACTCAGCGTCCTCCTCTTCCTTGCACTCCTCACAGGACTCTTGCTACTCCTGGTTCAGCGCCACCCTAACACCCATGACCGCCTCCCACCAGGGCCCCGCCCTCTGCCCCTTTTGGGAAACCTTCTGCAGATGGATAGAAGAGGCCTACTCAAATCCTTTCTGAGGGTAAGACACAGACGAAT</Hsp_qseq>
              <Hsp_hseq>GGTCAGGATAAAAGGCCCAGTTGGAGGCTGCAGCAGGGTGCAGGGCAGTCAGACCAGGACCATGGAACTCAGCGTCCTCCTCTTCCTTGCACTCCTCACAGGACTCTTGCTACTCCTGGTTCAGTGCCACCCTAACACCCATGACCGCCTCCCACCAGGGCCCCGCCCTCTGCCCCTTTTGGGAAACCTTCTGCAGATGGATAGAAGAGGCCTACTCAAATCCTTTCTGAGGGTAAGACACAGACGAAT</Hsp_hseq>
              <Hsp_midline>|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||</Hsp_midline>
            </Hsp>
          </Hit_hsps>
        </Hit>
      </Iteration_hits>
      <Iteration_stat>
        <Statistics>
          <Statistics_db-num>58</Statistics_db-num>
          <Statistics_db-len>24590</Statistics_db-len>
          <Statistics_hsp-len>15</Statistics_hsp-len>
          <Statistics_eff-space>5550480</Statistics_eff-space>
          <Statistics_kappa>0.46</Statistics_kappa>
          <Statistics_lambda>1.28</Statistics_lambda>
          <Statistics_entropy>0.85</Statistics_entropy>
        </Statistics>
      </Iteration_stat>
    </Iteration>
  </BlastOutput_iterations>
</BlastOutput>

我正在尝试以下代码,但我不知道我调用了多少次 .Read 函数。

Private Sub ReadFromXML()

    Dim m_xmlr As XmlTextReader
    Dim xmlnode As XmlNodeList
    Form2.Visible = True
    Try

    'Load the Xml file

    m_xmlr = New XmlTextReader("C:\Program Files\NCBI\blast-2.2.25+\bin\similarity\out.xml")
    m_xmlr.WhitespaceHandling = WhitespaceHandling.None

    m_xmlr.Read()
    m_xmlr.Read()



    While Not m_xmlr.EOF
        m_xmlr.Read()
        m_xmlr.Read()
        m_xmlr.Read()
        m_xmlr.Read()
        m_xmlr.Read()
        'Dim qseq = m_xmlr.ReadElementString("Hsp_qseq")
        Dim hseq = m_xmlr.ReadElementString("Hsp_hseq")
        Dim midline = m_xmlr.ReadElementString("Hsp_midline")
        MsgBox(hseq) 

    End While


    Catch ex As Exception
        MsgBox(ex.Message)
    End Try
    m_xmlr.Close()
End Sub

还是有更好的方法来做到这一点?

谢谢

4

1 回答 1

2

我会使用XPath从文件中提取您需要的信息,因为它允许您准确查询所需的节点。

XPath 查询可能看起来很麻烦,但对于简单的操作来说,它相当容易上手。下面是一些示例代码,它们使用 XPath 提取您提到的那些节点的值并将它们的值打印到控制台:

Imports System.Xml.XPath
Imports System.IO

Module Module1

    Sub Main()

        Using File As New FileStream("C:\out.xml", FileMode.Open, FileAccess.Read)

            Dim Doc As New XPathDocument(File)
            Dim Nav = Doc.CreateNavigator()

            'Select and output the value of the Hsp_qseq nodes in the file.
            Dim QSeqNodes = Nav.Select("//BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit/Hit_hsps/Hsp/Hsp_qseq")

            While QSeqNodes.MoveNext()
                Console.WriteLine("Hsp_qseq: {0}", QSeqNodes.Current.Value)
            End While

            'Select and output the value of the Hsp_hseq nodes in the file.
            Dim HSeqNodes = Nav.Select("//BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit/Hit_hsps/Hsp/Hsp_hseq")

            While HSeqNodes.MoveNext()
                Console.WriteLine("Hsp_hseq: {0}", HSeqNodes.Current.Value)
            End While

            'Select and output the value of the Hsp_midline nodes in the file.
            Dim MidlineNodes = Nav.Select("//BlastOutput/BlastOutput_iterations/Iteration/Iteration_hits/Hit/Hit_hsps/Hsp/Hsp_midline")

            While MidlineNodes.MoveNext()
                Console.WriteLine("Hsp_midline: {0}", MidlineNodes.Current.Value)
            End While

            Console.Read()

        End Using

    End Sub

End Module

上面代码中唯一有趣的部分是Dim Foo = Nav.Select("...")位,参数是查询表达式以查询您想要的信息 - 在这种情况下,它是从根到您所追求的节点的简单路径,但它是可能的使用更强大的查询来执行。

这将为每个匹配的节点返回一个迭代器,因此这只是迭代和处理返回的每个节点的情况。

于 2011-12-03T16:48:50.267 回答