1

我正在尝试从 google sheet 获取一些 CVS 数据并将其存储到Apify dataset中。

const Apify = require('apify');
const request = require('request-promise');

Apify.main(async () => {

  var URL = "https://docs.google.com/spreadsheets/d/1-auXklWqHQ-jj6AXymMPa7FLtP1eYGJGF3rprxuWitk/gviz/tq?tqx=out:csv";

  const html = await request(URL);
  console.log('My output:');
  console.log(html);

  await Apify.setValue('OUTPUT', html);

  const namedDataset = await Apify.openDataset();

  await namedDataset.pushData(html);
});

这是错误消息:

2020-01-01T16:43:21.501Z My output:
2020-01-01T16:43:21.510Z "city","country"
2020-01-01T16:43:21.512Z "Berlin ","Germany"
2020-01-01T16:43:21.513Z "Los Angeles","United States"
2020-01-01T16:43:21.514Z "Melbourne","Australia"
2020-01-01T16:43:21.516Z "Sydney","Australia"
2020-01-01T16:43:21.517Z "London","United Kingdom"
2020-01-01T16:43:21.519Z "New York City","United States"
2020-01-01T16:43:21.614Z ERROR: The function passed to Apify.main() threw an exception: (error details: type=invalid-parameter)
2020-01-01T16:43:21.616Z   ApifyClientError: Parameter "data" of type Array | Object must be provided
2020-01-01T16:43:21.617Z     at exports.checkParamOrThrow (/usr/src/app/node_modules/apify-client/build/utils.js:222:15)
2020-01-01T16:43:21.619Z     at Dataset.pushData (/usr/src/app/node_modules/apify/build/dataset.js:222:34)
2020-01-01T16:43:21.620Z     at Apify.main (/usr/src/app/main.js:16:22)
2020-01-01T16:43:21.621Z     at process._tickCallback (internal/process/next_tick.js:68:7)
4

2 回答 2

2

更优雅的解决方案是使用我们的Google Sheets actor。

const Apify = require('apify');

Apify.main(async () => {
  const spreadsheetId = '1-auXklWqHQ-jj6AXymMPa7FLtP1eYGJGF3rprxuWitk';
  const sheetsActorInput = {
     mode: 'read',
     spreadsheetId,
  };
  const data = await Apify.call('lukaskrivka/google-sheets', sheetsActorInput);

  const namedDataset = await Apify.openDataset('my-dataset');
  await namedDataset.pushData(data);
});

唯一的缺点(在某种意义上也是一个优点)是您需要在第一次运行时授权,但这真的很简单。

于 2020-01-02T07:23:19.343 回答
0

我能够使用这种有点老套的方法。我相信他们是一种更现代的 elgagent 方法:

const Apify = require('apify');
const request = require('request-promise');

function csvJSON(csv) { //https://stackoverflow.com/a/27979069/2330272
  var lines = csv.split("\n");
  var result = [];
  // NOTE: If your columns contain commas in their values, you'll need
  // to deal with those before doing the next step 
  // (you might convert them to &&& or something, then covert them back later)
  // jsfiddle showing the issue https://jsfiddle.net/
  var headers = lines[0].split(",");
  for (var i = 1; i < lines.length; i++) {
    var obj = {};
    var currentline = lines[i].split(",");
    for (var j = 0; j < headers.length; j++) {
      obj[headers[j]] = currentline[j];
    }
    result.push(obj);
  }
  return JSON.stringify(result); //JSON
}

Apify.main(async () => {
  var URL = "https://docs.google.com/spreadsheets/d/1-auXklWqHQ-jj6AXymMPa7FLtP1eYGJGF3rprxuWitk/gviz/tq?tqx=out:csv"; //test
  const html = await request(URL);
  const urls = csvJSON(html.replace(/\"/g, "")); // remove quotes from csv data
  console.log('My output:');
  const namedDataset = await Apify.openDataset();
  await namedDataset.pushData(JSON.parse(urls));
});
于 2020-01-01T17:11:39.327 回答