我们正在 Google 的电子表格中开发 POC。
有一些配置,但简而言之,它从 BigQuery 下载数据并将其重定向到 Prediction。
我们的 Bigquery 表超过 41Mb,电子表格不允许/支持。
我们考虑从 Bigquery 下载 5Mb 的数据包。虽然 Predicition API 提供了插入大量数据的方法,但update方法只允许上传一行/实例。
有没有办法将 Bigquery 数据直接重定向到 Prediction?
主要涉及的三个功能是:
/**
* this function downloads data from Bigquery
* for a given table, it receives the columns supposed to be downloaded
*
* @params
* - modelName // the new prediction model name
* - columns // an array of columns
* - table // the table name
* - dataset // the dataset name
*/
function createPrediction(modelName, columns, table, dataset) {
try {
var rows = simpleQuery(columns, table, dataset);
var rowsL = rows.length;
var trainingL = parseInt(0.9 * rowsL, 10);
var training_instances = [];
for (var i = 0; i < trainingL; i++) {
training_instances.push({'output': rows[i].f[0].v, 'csvInstance': rows[i].f[1].v});
}
var resource = {'id': modelName, 'trainingInstances': training_instances};
var insert_reply = Prediction.Trainedmodels.insert(resource, c_projectId);
c_modelName = modelName;
Browser.msgBox('Insert reply:' + insert_reply, Browser.Buttons.OK);
} catch (err) {
Browser.msgBox('ERROR: ' + err, Browser.Buttons.OK);
}
}
/**
* this function should receive a dataset name, a table name and some columns' names
* it is supposed to return the "SELECT [COLUMNS] FROM [DATASET.TABLE]"
*
* @params:
* -
*/
function simpleQuery(columns, table, dataset, projectId) {
projectId = projectId || c_projectId;
return getQuery("SELECT " + columns.join(",") + " FROM [" + dataset + "." + table + "]", projectId);
}
/**
* this function should return the result for a given query
*
* @params:
* - query // the query to be evaluated
* - projectId // the googles's project id
*/
function getQuery(query, projectId) {
var request = {
query: query
};
var queryResults = BigQuery.Jobs.query(request, projectId);
var jobId = queryResults.jobReference.jobId;
// Check on status of the Query Job.
var sleepTimeMs = 500;
while (!queryResults.jobComplete) {
Utilities.sleep(sleepTimeMs);
sleepTimeMs *= 2;
queryResults = BigQuery.Jobs.getQueryResults(projectId, jobId);
}
// Get all the rows of results.
var rows = queryResults.rows;
while (queryResults.pageToken) {
queryResults = BigQuery.Jobs.getQueryResults(projectId, jobId, {
pageToken: queryResults.pageToken
});
rows = rows.concat(queryResults.rows);
}
return rows;
}
问候。