1

Haskell 包 hxt 的用法对我来说还是有点奇怪。尤其是箭头符号和产生的类型是一种魔法。

到目前为止,我无法管理以下内容:我想处理一个主要包含两个部分的 XML 文件。一个保留对象的定义,第二个保留对象的用途/目的。首先,我想编写一些 hxt 处理以在第 1 部分上获取 Haskell 数据结构,然后在第 2 部分处理,最后结合在程序的真实逻辑中读取的两种数据结构。

由于箭头教程,现在处理文件通常很好。但我希望现在有一个 do notation 执行三个步骤:读取文档(惰性),用第一个处理器处理一次结果结构,然后用第二个处理器再次处理相同的结构。我不希望像以下示例中那样调用“readDocument”两次。

import Text.XML.HXT.Core
import Data.Char(toUpper)
import Data.Tree.NTree.TypeDefs

play filename = do 
                  results <- runX (getAllAddresses filename) 
                  results2 <- runX (getAllAddressesUsages filename) 
                  print results 
                  print results2 



getAllAddresses :: FilePath -> IOSArrow XmlTree [(String,NTree XNode)]
getAllAddresses filename =
    readDocument [withValidate no] filename >>>
    getChildren >>>
    isElem >>> hasName "main" >>>
    getChildren >>>
    isElem >>> hasName "part1" >>>
    getChildren >>>
    isElem >>> hasName "address" >>>
    listA(getAddress)                 -- create a list for each variable, so use listA



getAddress :: IOSArrow XmlTree (String,NTree XNode)
getAddress =
    getChildren >>>
    isElem >>>
         (
          neg ( hasName "location") >>>   -- all elements being no "location"
          getName &&& (getChildren)       -- get the name and the value for each element
         ) 
    <+>     
    ( 
      hasName "location" >>>              -- work on all nodes within the  "location" subcontainer
      getChildren >>> 
      isElem >>>
      ( getName &&& (getChildren) )       -- get the name and the value for each element
     )




getAllAddressesUsages :: FilePath -> IOSArrow XmlTree [(String,NTree XNode)]
getAllAddressesUsages filename =
    readDocument [withValidate no] filename >>>
    getChildren >>>
    isElem >>> hasName "main" >>>
    getChildren >>>
    isElem >>> hasName "part2" >>>
    getChildren >>>
    listA(getAddressUsagePurpose2)                 -- create a list for each variable, so use listA

getAddressUsagePurpose2 :: IOSArrow XmlTree (String,NTree XNode)
getAddressUsagePurpose2 =
    hasName "use_obj-names_for_purpose_2" >>>            -- work on all nodes with usage 2
    ( getName &&& (getChildren) )                        -- get the name and the value for each element

示例数据:

<main>
 <part1>
  <address>
    <obj-name>one</obj-name>
    <name>peter 1</name>
    <street>streetname 1</street>
    <location>
      <country>Germany</country>
      <state>Baden Wuerttemberg</state>
   </location>
   </address>
  <address>
    <obj-name>two</obj-name>
    <name>peter 2</name>
    <street>streetname 2</street>
    <location>
      <country>Germany</country>
      <state>Nordrhein Westfalen</state>
      </location>
   </address>
 </part1>
 <part2>
   <use_obj-names_for_purpose_1>
     <obj-name>two</obj-name>
   </use_obj-names_for_purpose_1>
   <use_obj-names_for_purpose_2>
     <obj-name>two</obj-name>
   </use_obj-names_for_purpose_2>
 </part2>
</main>

所以正式的问题是:

为了得到这样的东西,monadic 在函数 play 中的表现如何:

readXmlDocument :: String -> IOSArrow XmlTree (NTree XNode)
readXmlDocument filename = readDocument [withValidate no] filename

play filename = do 
             document <- readXmlDocument filename
             allAddresses <- getAllAddresses document
             allPurposes <- getAllAddressesUsages document
             result <- processLogics allAddresses allPurposes 
             print result

我如何从 Monads 到 Arrows,再回到 Monads,再到纯数据再回到 Monads。

我为什么要这样做?

4

1 回答 1

1

该问题的一种解决方案如下:

使用 Arrow 语言扩展并使用“proc”表达式来处理在两个处理器路径中的一个函数中读取的文档。结果组合在一个元组中。这个元组仍然包含两个需要运行的箭头。这是由 runX 函数的两个应用程序完成的。

一旦机器人结果在以下计算中组合,我仍然不完全知道文件是否被此构造加载一两次。

{-# LANGUAGE Arrows #-}

import Text.XML.HXT.Core
import Data.Char(toUpper)
import Data.Tree.NTree.TypeDefs


play filename = (runX addresses, runX usages)
    where (addresses,usages)=(analyseXml (readXmlDocument filename))

analyseXml :: IOSArrow XmlTree (NTree XNode) -> (IOSArrow XmlTree [(String,NTree XNode)],IOSArrow XmlTree String)
analyseXml = proc document -> do 
               allAddresses <- getAllAddresses -< document
               allUsages <- getAllAddressesUsages -< document
               returnA -< (allAddresses,allUsages)

readXmlDocument :: String -> IOSArrow XmlTree (NTree XNode)
readXmlDocument filename = readDocument [withValidate no] filename



getAllAddresses :: IOSArrow XmlTree (NTree XNode) -> IOSArrow XmlTree [(String,NTree XNode)]
getAllAddresses document =
    document >>>
    getChildren >>>
    isElem >>> hasName "main" >>>
    getChildren >>>
    isElem >>> hasName "part1" >>>
    getChildren >>>
    isElem >>> hasName "address" >>>
    listA(getAddress)                 -- create a list for each variable, so use listA



getAddress :: IOSArrow XmlTree (String,NTree XNode)
getAddress =
    getChildren >>>
    isElem >>>
         (
          neg ( hasName "location") >>>   -- all elements being no "location"
          getName &&& (getChildren)       -- get the name and the value for each element
         ) 
    <+>     
    ( 
      hasName "location" >>>              -- work on all nodes within the  "location" subcontainer
      getChildren >>> 
      isElem >>>
      ( getName &&& (getChildren) )       -- get the name and the value for each element
     )




getAllAddressesUsages :: IOSArrow XmlTree (NTree XNode) -> IOSArrow XmlTree String
getAllAddressesUsages document =
    document >>>
    getChildren >>>
    isElem >>> hasName "main" >>>
    getChildren >>>
    isElem >>> hasName "part2" >>>
    getChildren >>>
    isElem >>> hasName "use_obj-names_for_purpose_2" >>>
    getChildren >>>
    isElem >>> hasName "obj-name" >>>
    getChildren >>>
    getText                 -- create a list with objects for each short-name. So use listA

可以按如下方式执行:

*Main>  snd ( play  "../tmp/haskell/test.xml")
["two"]

*Main>  fst ( play  "../tmp/haskell/test.xml")
[[("obj-name",NTree (XText "one") []),("name",NTree (XText "peter 1") []),("street",NTree (XText "streetname 1") []),("country",NTree (XText "Germany") []),("state",NTree (XText "Baden Wuerttemberg") [])],[("obj-name",NTree (XText "two") []),("name",NTree (XText "peter 2") []),("street",NTree (XText "streetname 2") []),("country",NTree (XText "Germany") []),("state",NTree (XText "Nordrhein Westfalen") [])]]
*Main>
于 2013-02-24T23:02:36.137 回答