这是我想出的使用正则表达式的方法。非常具体,绝对不比readHTMLTable在其他答案中使用的更好。更多内容表明您可以在 R 中进行文本挖掘:
# file <- "~/Documents/R/medals.html"
# page <- readChar(file,file.info(file)$size)
library(RCurl) 
theurl <- "http://www.london2012.com/medals/medal-count/"
page <- getURLContent(theurl, useragent="Mozilla/5.0 (Windows NT 6.1; rv:15.0) Gecko/20120716 Firefox/15.0a2")
# Remove html tags:
page <- gsub("<(.|\n)*?>","",page)
# Remove newlines and tabs:
page <- gsub("\\n","",page)
# match table:
page <- regmatches(page,regexpr("(?<=Total).*(?=Detailed)",page,perl=TRUE))
# Extract country+medals+rank
codes <-regmatches(page,gregexpr("\\d+[^\\r]*\\d+",page,perl=TRUE))[[1]]
codes <- codes[seq(1,length(codes)-2,by=2)]
# Extract country and medals:
Names <- gsub("\\d","",codes)
Medals <- sapply(regmatches(codes,gregexpr("\\d",codes)),function(x)x[(length(x)-2):length(x)])
# Create data frame:
data.frame(
  Country = Names,
  Gold = as.numeric(Medals[1,]),
  Silver = as.numeric(Medals[2,]),
  Bronze = as.numeric(Medals[3,]))
和输出:
                                  Country Gold Silver Bronze
1              People's Republic of China    6      4      2
2                United States of America    3      5      3
3                                   Italy    2      3      2
4                       Republic of Korea    2      1      2
5                                  France    2      1      1
6  Democratic People's Republic  of Korea    2      0      1
7                              Kazakhstan    2      0      0
8                               Australia    1      1      1
9                                  Brazil    1      1      1
10                                Hungary    1      1      1
11                            Netherlands    1      1      0
12                     Russian Federation    1      0      3
13                                Georgia    1      0      0
14                           South Africa    1      0      0
15                                  Japan    0      2      3
16                          Great Britain    0      1      1
17                               Colombia    0      1      0
18                                   Cuba    0      1      0
19                                 Poland    0      1      0
20                                Romania    0      1      0
21                Taipei (Chinese Taipei)    0      1      0
22                             Azerbaijan    0      0      1
23                                Belgium    0      0      1
24                                 Canada    0      0      1
25                    Republic of Moldova    0      0      1
26                                 Norway    0      0      1
27                                 Serbia    0      0      1
28                               Slovakia    0      0      1
29                                Ukraine    0      0      1
30                             Uzbekistan    0      0      1