2

我正在尝试将下表 -从链接- 获取到谷歌表中。

我尝试了以下方法:

=IMPORTXML("http://www.immopreise.at/Wien/Wohnung/Miete","//table[@id='preisTabelle']")

附上你可以找到一个示例表:

https://docs.google.com/spreadsheets/d/1-aXJULo6BELQQ6136Lps_HUzOwkw5SKaPGxIl5gBDfM/edit?usp=sharing

我的问题是我没有得到任何回报。

有什么建议我做错了吗?

感谢您的回复!

4

1 回答 1

4

第一种方法:您可以这样做(使用 ImportXml 和 RegexExtract:

=IMPORTXML("http://www.immopreise.at/Wien/Wohnung/Miete",
           "//table[@id='preisTabelle']")

上面提到的代码会产生一个空字符串,因为网页在该位置有一个空表,如下所示:

<table id="preisTabelle"></table>

数据实际上位于<script>标签内:

<script>
            var ImmoOptions = {"mapOptions":{"region":"Wien","karteAnzeigen":true},"TrendChartConf":{"uri":{"district":"/Trend/GetDistricts","chart":"/Trend/GetChart","chart1":"/Trend/GetTrendChart","compare":"/Preisvergleich","chart2":"/Trend/TrendChart","refresh":"/Preisentwicklung","uebersicht":"/?region=Wien\u0026pathInfo=Wohnung%2FMiete"},"firstDirstrict":{"Wien":"Wien-1-Innere-Stadt","Niederoesterreich":"Sankt-Poelten-Stadt","Burgenland":"Eisenstadt-Stadt","Oberoesterreich":"Linz-Stadt","Steiermark":"Graz-Alle-Bezirke","Kaernten":"Klagenfurt-Stadt","Salzburg":"Salzburg-Stadt","Tirol":"Innsbruck-Stadt","Vorarlberg":"Bregenz"},"firstDirstrictId":{"9":231,"3":153,"1":133,"4":177,"6":201,"2":142,"5":195,"7":218,"8":228}},"preisInfos":{"tabelle":{"spalten":[{"name":"≤50m²","spaltenArt":"Waehrung","nachkommaStellen":true,"farbmarkierung":null},{"name":"51-80m²","spaltenArt":"Waehrung","nachkommaStellen":true,"farbmarkierung":null},{"name":"81-129m²","spaltenArt":"Waehrung","nachkommaStellen":true,"farbmarkierung":null},{"name":"\u003e130m²","spaltenArt":"Waehrung","nachkommaStellen":true,"farbmarkierung":null},{"name":"\u003cspan class=\u0027Detailed\u0027\u003e\u0026#216;/m²\u003c/span\u003e\u003cspan class=\u0027Compact\u0027\u003e\u0026#216;/m²\u003c/span\u003e","spaltenArt":"Waehrung","nachkommaStellen":true,"farbmarkierung":true},{"name":"\u003cspan class=\u0027Detailed\u0027\u003eTrend\u003c/span\u003e\u003cspan class=\u0027Compact\u0027\u003eTd.\u003c/span\u003e","spaltenArt":"Tendenz","nachkommaStellen":false,"farbmarkierung":null}],"zeilen":[{"name":" 1.,  Innere Stadt","zellen":[21.68,19.02,18.43,19.56,19.27,0],"id":231},{"name":" 2.,  Leopoldstadt","zellen":[18.27,15.06,14.28,14.20,14.85,1],"id":232},{"name":" 3.,  Landstraße","zellen":[18.88,17.04,15.42,14.68,16.03,1],"id":233},{"name":" 4.,  Wieden","zellen":[19.37,16.58,16.89,16.35,16.83,1],"id":234},{"name":" 5.,  Margareten","zellen":[15.46,14.11,14.20,14.77,14.39,0],"id":235},{"name":" 6.,  Mariahilf","zellen":[18.23,14.68,15.72,15.32,15.53,1],"id":236},{"name":" 7.,  Neubau","zellen":[16.09,14.89,14.58,14.94,14.95,0],"id":237},{"name":" 8.,  Josefstadt","zellen":[16.77,16.78,14.02,14.93,15.08,0],"id":238},{"name":" 9.,  Alsergrund","zellen":[15.72,14.48,14.53,14.92,14.69,0],"id":239},{"name":"10.,  Favoriten","zellen":[14.14,12.35,11.81,0,12.52,0],"id":240},{"name":"11.,  Simmering","zellen":[13.69,12.34,11.50,13.46,12.38,-1],"id":241},{"name":"12.,  Meidling","zellen":[15.66,14.97,13.28,11.79,14.54,1],"id":242},{"name":"13.,  Hietzing","zellen":[16.71,15.93,14.63,14.05,14.99,0],"id":243},{"name":"14.,  Penzing","zellen":[14.43,13.14,12.72,12.37,13.11,0],"id":244},{"name":"15.,  Rudolfsheim-Fünfhaus","zellen":[13.58,12.90,12.93,11.76,13.07,0],"id":245},{"name":"16.,  Ottakring","zellen":[13.99,12.64,12.64,12.45,12.90,0],"id":246},{"name":"17.,  Hernals","zellen":[14.71,13.06,13.15,13.61,13.50,1],"id":247},{"name":"18.,  Währing","zellen":[14.47,13.96,13.82,15.67,14.37,0],"id":248},{"name":"19.,  Döbling","zellen":[16.21,14.37,15.06,16.44,15.29,0],"id":249},{"name":"20.,  Brigittenau","zellen":[15.68,13.57,12.56,13.30,13.43,0],"id":250},{"name":"21.,  Floridsdorf","zellen":[15.58,13.58,12.38,14.97,13.68,1],"id":251},{"name":"22.,  Donaustadt","zellen":[18.19,15.57,16.85,15.89,16.18,0],"id":252},{"name":"23.,  Liesing","zellen":[14.79,14.09,13.49,15.60,13.92,1],"id":253}],"tabellenTitel":"Wohnungen Miete","titelErsteSpalte":"Bezirk","GesamtAnzahlObjekte":12739},"preisspannen":[{"bis":12},{"bis":14},{"bis":15},{"bis":null}]},"basecharts":null,"CurrentView":{"trendVar":{"CatNum":0,"ImmoArtNum":5,"AltbauNum":2,"AngebotTypeNum":1},"hid":0}} ;
            jQuery(function () {
                InitMap(ImmoOptions.mapOptions); 
            });
</script>

最感兴趣的数据位于变量 ImmoOptions 中:

[
  {
    "name": " 1.,  Innere Stadt",
    "zellen": [21.68, 19.02, 18.43, 19.56, 19.27, 0],
    "id": 231
  },
  {
    "name": " 2.,  Leopoldstadt",
    "zellen": [18.27, 15.06, 14.28, 14.2, 14.85, 1],
    "id": 232
  },
  /* Edited for brevity */
]

以下公式可以将脚本放入电子表格中的单元格中(假设我们将其粘贴到单元格 A[100] 中)..

=IMPORTXML("http://www.immopreise.at/Wien/Wohnung/Miete","//script[2]")

然后,以下公式将 JSON 字符串(ImmoOptions 变量的值)提取到一个单元格中(假设我们将以下内容粘贴到单元格 A[1] 中)..

=REGEXEXTRACT(A100,"(?s)=(.*)")

此时,我们需要 javascript 来解析 JSON。这可以通过将工作表转换为Google 应用程序(工具->脚本编辑器)并在 javascript 中进行编码来完成。

在javascript中,会有三个步骤(这里不做详细介绍):

1. Use IMPORTXML to get the data inside script (in the url/page)
2. Use REGEXEXTRACT to get the value of ImmoOptions as JSON string
3. Parse JSON string to get the data

第二种方法:这是使用 Google 应用程序/脚本的方法:

  1. 登录谷歌并在浏览器中打开此电子表格。

  2. 选择File->Make a Copy(可能使用 S1 之类的名称)。这将在您的谷歌驱动器中复制文件;并在新选项卡中打开它。

  3. 转到那个新窗口/标签。选择Tools->Script Editor。这将使您进入带有脚本的编辑器。从工具栏中选择函数doGet并运行脚本;它将生成电子表格。

这是附在工作表上的脚本(供参考,以防链接丢失):

function doGet() {
  var r1=Math.random()*100000000000;
  var html = UrlFetchApp.fetch("http://www.immopreise.at/Wien/Wohnung/Miete?somevariable=" + r1).getContentText();
  var re = /var ImmoOptions = (.*);/i;  
  var jo=JSON.parse(re.exec(html)[1]);  
  var arr=jo["preisInfos"]["tabelle"]["zeilen"];

  var sheet = SpreadsheetApp.getActiveSheet();
  sheet.clear(); sheet.appendRow([r1]);
  sheet.appendRow(['Bezirk','Col-1','Col-2','Col-3','Col-4','Col-5','Col-6']);

  for (var i=0;i<arr.length;i++) {
      var item = arr[i]; var row=[item.name];
      row=row.concat(item.zellen); sheet.appendRow(row);

  }  
}

这个怎么运作:

  1. 拉取相关 url 的整个 html 内容。
  2. 使用正则表达式从内部提取json数据<script>..</script>
  3. 解析提取的 json 数据。
  4. 获取相关数组;填充到电子表格中。

缺点:

  1. 这是一个脆弱的补丁工作脚本,会随着<script>包含中的更改而中断(或以任何其他方式正则表达式的中断)
  2. 不能很好地控制表格的 UI(它们可以构建,但需要做更多的工作)。
  3. 仅当整个 json 数据在一行中时才有效(可以通过删除新行来修改 .. 或使用适当的正则表达式)。
于 2016-11-20T02:00:20.030 回答