0

(对 R 来说完全是新的)我已经下载了一个 XML 文件以在 R 中使用来从数据中创建一个等值线图。我正在使用美国流感数据。根据我的研究,我了解到我需要将该 XML 文件作为数据框供 R 读取。所以我做到了。当我查看我的数据框时,我得到了所有的 XML 格式。我的问题是如何获取我需要的信息并提取它来创建地图?在这一点上,我什至在绘制数据时都遇到了错误。我已经为这些信息寻找了高位和低位,但我还没有找到它。

 setwd("C:/Users/Steven/Downloads/Map_Final")
> library (XML)
> library(ggplot2)
> library(maps)
> library(plyr)
> library(mapproj)
> map('state')
> 
> xmlfile=xmlParse("flu.xml")
> 
> class(xmlfile)
[1] "XMLInternalDocument" "XMLAbstractDocument"
> ggplot(xmlfile)
Error: ggplot2 doesn't know how to deal with data of class XMLInternalDocumentXMLAbstractDocument
> xmltop = xmlRoot(xmlfile) #gives content of root
> 
> class(xmltop)#"XMLInternalElementNode" "XMLInternalNode" "XMLAbstractNode"
[1] "XMLInternalElementNode" "XMLInternalNode"        "XMLAbstractNode"       
> 
> xmlName(xmltop) #give name of node, PubmedArticleSet
[1] "timeperiod"
> 
> xmlSize(xmltop) #how many children in node, 19
[1] 54
> 
> xmlName(xmltop[[1]]) #name of root's children
[1] "state"
> 
> xmltop[[1]]
<state>
  <abbrev>ME</abbrev>
  <color>No Activity</color>
  <label>No Activity</label>
</state> 
> 
> xmltop[[2]]
<state>
  <abbrev>NH</abbrev>
  <color>Local Activity</color>
  <label>Local Activity</label>
</state> 
> 
> ggplot(xmltop)
Error: ggplot2 doesn't know how to deal with data of class XMLInternalElementNodeXMLInternalNodeXMLAbstractNode
> xmltop[[2]]
<state>
  <abbrev>NH</abbrev>
  <color>Local Activity</color>
  <label>Local Activity</label>
</state> 
> 
> xmltop[[2]]
<state>
  <abbrev>NH</abbrev>
  <color>Local Activity</color>
  <label>Local Activity</label>
</state> 
> 
> birdflu=ldply(xmlToList("flu.xml"), data.frame)
> ggplot(birdflu)
Error: No layers in plot
> View(birdflu)

XML 文件:

<timeperiod number="40" year="2014" subtitle="Week Ending October 11, 2014- Week 40">
<state>
<abbrev>ME</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NH</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>VT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>RI</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>CT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NY</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NJ</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>PR</abbrev>
<color>Regional</color>
<label>Regional</label>
</state>
<state>
<abbrev>VI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>PA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>DE</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MD</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>DC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>VA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>WV</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>SC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>GA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>FL</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>KY</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>TN</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>AL</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>MS</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>OH</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>IN</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>IL</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>WI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MN</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AR</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>LA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>OK</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>TX</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NM</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>IA</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MO</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NE</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>KS</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>ND</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>SD</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MT</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>WY</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>CO</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>UT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AZ</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NV</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>CA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>HI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>GU</abbrev>
<color>Widespread</color>
<label>Widespread</label>
</state>
<state>
<abbrev>ID</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>WA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>OR</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AK</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
</timeperiod>
4

1 回答 1

1

下面使用内置 (ggplot) 美国地图数据的基本注释示例。如果您需要这些地区(似乎有更多的爆发),您需要查看其他显示这一点的 SO 示例(其中有很多)。

library(xml2)
library(dplyr)
library(ggplot2)

# read in the XML file

flu <- read_xml("flu.xml")

# get data from it into data frame

flu_dat <- data_frame(id=flu %>% xml_find_all("//state/abbrev") %>% xml_text,
                      value=flu %>% xml_find_all("//state/color") %>% xml_text)

# for built-in (ggplot) map data we need names, not abbreviations

state_name <- state.name
names(state_name) <- state.abb

us <- map_data("state")

# convert abbrev to name; ensure ordered factor, filter by what the
# built-in plot has. NOTE that if you need the territories, you'll
# need to use another base map of which there are many examples on SO

flu_dat %>% 
  mutate(id=state_name[id],
         Level=factor(value, 
                      levels=c("No Activity", "Local Activity", 
                               "Sporadic", "Regional", "Widespread"),
                      ordered=TRUE)) %>% 
  filter(id %in% unique(us$region))-> flu_dat

us <- fortify(us, region="region")

# for theme_map convenience function
devtools::source_gist("33baa3a79c5cfef0f6df")

gg <- ggplot()
# plot outlines
gg <- gg + geom_map(data=us, map=us,
                    aes(x=long, y=lat, map_id=region, group=group),
                    fill="#ffffff", color="#7f7f7f", size=0.25)
# plot fills based on flu data
gg <- gg + geom_map(data=flu_dat, map=us,
                    aes(fill=Level, map_id=id),
                    color="#7f7f7f", size=0.25)
# manual fill scale showing all possible values on legend
gg <- gg + scale_fill_manual(values=c("#f2f0f7", "#dadaeb", "#bcbddc", 
                                      "#9e9ac8", "#756bb1"), drop=FALSE)
# a proper US projection
gg <- gg + coord_map("albers", lat0=39, lat1=45)
gg <- gg + theme_map()
gg <- gg + theme(legend.position="right")
gg

在此处输入图像描述

于 2015-05-06T01:30:15.647 回答