使用 GraphQL API v4
您可以使用GraphQL API v4来优化每个分支的提交下载。在以下方法中,我设法在单个请求中下载了 1900 个提交(19 个不同分支中每个分支 100 个提交),这大大减少了请求数量(与使用 REST api 相比)。
1 - 获取所有分支
如果您有超过 100 个分支,则必须获取所有分支并进行分页:
询问 :
query($owner:String!, $name:String!, $branchCursor: String!) {
repository(owner: $owner, name: $name) {
refs(first: 100, refPrefix: "refs/heads/",after: $branchCursor) {
totalCount
edges {
node {
name
target {
...on Commit {
history(first:0){
totalCount
}
}
}
}
}
pageInfo {
endCursor
hasNextPage
}
}
}
}
变量:
{
"owner": "google",
"name": "gson",
"branchCursor": ""
}
在资源管理器中尝试
请注意,在这种情况下branchCursor
,当您有超过 100 个分支并具有pageInfo.endCursor
上一个请求中的值时,将使用变量。
2 - 将分支数组拆分为最多 19 个分支的数组
每个节点的请求数量有一些限制,这会阻止我们对每个节点进行过多的查询。在这里,我执行的一些测试表明,我们不能在单个查询中超过 19*100 次提交。
请注意,对于具有 < 19 个分支的 repo,您无需为此烦恼
3 - 每个分支按 100 块查询提交
然后,您可以动态创建查询以获取所有分支上的 100 个下一个提交。有 2 个分支的示例:
query ($owner: String!, $name: String!) {
repository(owner: $owner, name: $name) {
branch0: ref(qualifiedName: "JsonArrayImplementsList") {
target {
... on Commit {
history(first: 100) {
...CommitFragment
}
}
}
}
branch1: ref(qualifiedName: "master") {
target {
... on Commit {
history(first: 100) {
...CommitFragment
}
}
}
}
}
}
fragment CommitFragment on CommitHistoryConnection {
totalCount
nodes {
oid
message
committedDate
author {
name
email
}
}
pageInfo {
hasNextPage
endCursor
}
}
在资源管理器中尝试
- 使用的变量用于
owner
存储库的所有者和存储库name
的名称。
- 一个片段,以避免重复提交历史字段定义。
您可以看到pageInfo.hasNextpage
&pageInfo.endCursor
将用于为每个分支进行分页。分页发生在history(first: 100)
指定遇到的最后一个游标的情况下。例如下一个请求将有history(first: 100, after: "6e2fcdcaf252c54a151ce6a4441280e4c54153ae 99")
. 对于每个分支,我们必须使用最后一个endCursor
值更新请求以查询 100 次提交。
pageInfo.hasNextPage
什么时候false
,这个分支没有更多的页面,所以我们不会在下一个请求中包含它。
当最后一个分支必须pageInfo.hasNextPage
时false
,我们已经检索了所有提交
示例实现
这是使用github-graphql-client在 NodeJS 中的示例实现。可以用任何其他语言实现相同的方法。以下内容还将提交存储在文件中commitsX.json
:
var client = require('github-graphql-client');
var fs = require("fs");
const owner = "google";
const repo = "gson";
const accessToken = "YOUR_ACCESS_TOKEN";
const branchQuery = `
query($owner:String!, $name:String!, $branchCursor: String!) {
repository(owner: $owner, name: $name) {
refs(first: 100, refPrefix: "refs/heads/",after: $branchCursor) {
totalCount
edges {
node {
name
target {
...on Commit {
history(first:0){
totalCount
}
}
}
}
}
pageInfo {
endCursor
hasNextPage
}
}
}
}`;
function buildCommitQuery(branches){
var query = `
query ($owner: String!, $name: String!) {
repository(owner: $owner, name: $name) {`;
for (var key in branches) {
if (branches.hasOwnProperty(key) && branches[key].hasNextPage) {
query+=`
${key}: ref(qualifiedName: "${branches[key].name}") {
target {
... on Commit {
history(first: 100, after: ${branches[key].cursor ? '"' + branches[key].cursor + '"': null}) {
...CommitFragment
}
}
}
}`;
}
}
query+=`
}
}`;
query+= commitFragment;
return query;
}
const commitFragment = `
fragment CommitFragment on CommitHistoryConnection {
totalCount
nodes {
oid
message
committedDate
author {
name
email
}
}
pageInfo {
hasNextPage
endCursor
}
}`;
function doRequest(query, variables) {
return new Promise(function (resolve, reject) {
client({
token: accessToken,
query: query,
variables: variables
}, function (err, res) {
if (!err) {
resolve(res);
} else {
console.log(JSON.stringify(err, null, 2));
reject(err);
}
});
});
}
function buildBranchObject(branch){
var refs = {};
for (var i = 0; i < branch.length; i++) {
console.log("branch " + branch[i].node.name);
refs["branch" + i] = {
name: branch[i].node.name,
totalCount: branch[i].node.target.history.totalCount,
cursor: null,
hasNextPage : true,
commits: []
};
}
return refs;
}
async function requestGraphql() {
var iterateBranch = true;
var branches = [];
var cursor = "";
// get all branches
while (iterateBranch) {
let res = await doRequest(branchQuery,{
"owner": owner,
"name": repo,
"branchCursor": cursor
});
iterateBranch = res.data.repository.refs.pageInfo.hasNextPage;
cursor = res.data.repository.refs.pageInfo.endCursor;
branches = branches.concat(res.data.repository.refs.edges);
}
//split the branch array into smaller array of 19 items
var refChunk = [], size = 19;
while (branches.length > 0){
refChunk.push(branches.splice(0, size));
}
for (var j = 0; j < refChunk.length; j++) {
//1) store branches in a format that makes it easy to concat commit when receiving the query result
var refs = buildBranchObject(refChunk[j]);
//2) query commits while there are some pages existing. Note that branches that don't have pages are not
//added in subsequent request. When there are no more page, the loop exit
var hasNextPage = true;
var count = 0;
while (hasNextPage) {
var commitQuery = buildCommitQuery(refs);
console.log("request : " + count);
let commitResult = await doRequest(commitQuery, {
"owner": owner,
"name": repo
});
hasNextPage = false;
for (var key in refs) {
if (refs.hasOwnProperty(key) && commitResult.data.repository[key]) {
isEmpty = false;
let history = commitResult.data.repository[key].target.history;
refs[key].commits = refs[key].commits.concat(history.nodes);
refs[key].cursor = (history.pageInfo.hasNextPage) ? history.pageInfo.endCursor : '';
refs[key].hasNextPage = history.pageInfo.hasNextPage;
console.log(key + " : " + refs[key].commits.length + "/" + refs[key].totalCount + " : " + refs[key].hasNextPage + " : " + refs[key].cursor + " : " + refs[key].name);
if (refs[key].hasNextPage){
hasNextPage = true;
}
}
}
count++;
console.log("------------------------------------");
}
for (var key in refs) {
if (refs.hasOwnProperty(key)) {
console.log(refs[key].totalCount + " : " + refs[key].commits.length + " : " + refs[key].name);
}
}
//3) write commits chunk (up to 19 branches) in a single json file
fs.writeFile("commits" + j + ".json", JSON.stringify(refs, null, 4), "utf8", function(err){
if (err){
console.log(err);
}
console.log("done");
});
}
}
requestGraphql();
这也适用于有很多分支的仓库,例如这个有 700 多个分支的仓库
速率限制
请注意,虽然使用 GraphQL 确实可以减少请求数量,但它不一定会提高您的速率限制,因为速率限制是基于点数而不是有限数量的请求:检查GraphQL API 速率限制