1

所以我使用 node.js 和模块 instagram-node-lib 来下载 Instagram 帖子的元数据。我有几个要搜索的主题标签,我想下载所有现有帖子(在分页期间处理请求失败)以及监控所有新帖子。

我设法破解了第一部分 - 下载所有现有帖子并处理失败(我注意到有时 Instagram API 会在我身上失败,所以我添加了冗余以记住我下载的最后一个成功页面并从那时起再次尝试)。对于任何有兴趣的人,这是我的代码(注意,我使用 Postgres 来保存帖子,并且为了便于阅读和商业目的,我对一些代码进行了缩写/混淆)**抱歉代码长度,但我认为这对某人有用:

var db = new (require('./postgres'))
    ,api = require("instagram-node-lib")
;

var HASHTAGS = ["fluffy", "kittens"] //this is just an example!
    ,CLIENT_ID = "YOUR_CLIENT_ID"
    ,CLIENT_SECRET = "YOUR_CLIENT_SECRET"
    ,HOST = "https://api.instagram.com"
    ,PORT = 443
    ,PATH = "/v1/media/popular?client_id=" + CLIENT_ID
;

var hashtagIndex = 0
    ,settings
;

/**
 * Initialise the module for use
 */
exports.initialise = function(){
    api.set("client_id", CLIENT_ID);
    api.set("client_secret", CLIENT_SECRET);

    if( !settings){
        settings = {
            hashtags: [] 
        }

        for( var i in HASHTAGS){
            settings.hashtags[i] = {
                name: HASHTAGS[i],
                maxTagId: null,
                minTagId: null,
                nextMaxTagId: null,
            }
        }
    }
    // console.log(settings);

    db.initialiseSettings(); //I haven't included the code for this - basically just loads settings from the database, overwriting the defaults above if they exist, otherwise it creates them using the above object. I store the settings as a JSON object in the DB and parse them on load

    execute();    
}

function execute(){
    var params = {
        name: HASHTAGS[hashtagIndex],
        complete: function(data, pagination){
            var hashtag = settings.hashtags[hashtagIndex];

            //from scratch
            if( !hashtag.maxTagId){
                console.log('Downloading old posts from scratch');

                getOldPosts();
            }
            //still loading old (previously failed)
            else if( hashtag.nextMaxTagId){
                console.log('Downloading old posts from last saved position');

                getOldPosts(hashtag.nextMaxTagId);
            }
            //new posts only
            else {
                console.log('Downloading new posts only');

                getNewPosts(hashtag.minTagId);
            }
        },
        error: function(msg, obj, caller){
            apiError(msg, obj, caller);
        }
    }

    api.tags.info(params);    
}

function getOldPosts(maxTagId){
    console.log();

    var params = {
        name: HASHTAGS[hashtagIndex],
        count: 100,
        max_tag_id: maxTagId || undefined,
        complete: function(data, pagination){
            console.log(pagination); 

            var hashtag = settings.hashtags[hashtagIndex];

            //reached the end
            if( pagination.next_max_tag_id == hashtag.maxTagId){
                console.log('Downloaded all posts for #' + HASHTAGS[hashtagIndex]);

                hashtag.nextMaxTagId = null; //reset nextMaxTagId - that way next time we execute the script we know to just look for new posts
                saveSettings(function(){
                    next();
                }); //Another function I haven't include - just saves the settings object, overwriting what is in the database. Once saved, executes the next() function
            }
            else {
                //from scratch
                if( !hashtag.maxTagId){
                    //these values will be saved once all posts in this batch have been saved. We set these only once, meaning that we have a baseline to compare to - enabling us to determine if we have reached the end of pagination
                    hashtag.maxTagId = pagination.next_max_tag_id;
                    hashtag.minTagId = pagination.min_tag_id;                    
                }

                //if there is a failure then we know where to start from - this is only saved to the database once the posts are successfully saved to database
                hashtag.nextMaxTagId = pagination.next_max_tag_id;   

                //again, another function not included. saves the posts to database, then updates the settings. Once they have completed we get the next page of data
                db.savePosts(data, function(){
                    saveSettings(function(){
                        getOldPosts(hashtag.nextMaxTagId);         
                    });
                });
            }                        
        },
        error: function(msg, obj, caller){
            apiError(msg, obj, caller);
            //keep calm and try again - this is our failure redundancy
            execute();    
        }
    }

    var posts = api.tags.recent(params);
}

/**
 * Still to be completed!
 */
function getNewPosts(minTagId){

}

function next(){
    if( hashtagIndex < HASHTAGS.length - 1){
        console.log("Moving onto the next hashtag...");

        hashtagIndex++;

        execute(); 
    }   
    else {
        console.log("All hashtags processed...");
    } 
}

好的,这是我解决下一个难题的难题 - 下载新帖子(换句话说,只有自我上次下载所有帖子以来已经存在的那些新帖子)。我应该使用Instagram 订阅还是有办法实现类似于我已经使用过的分页?我担心如果我使用以前的解决方案,那么如果我的服务器出现问题并且它会停机一段时间,那么我会错过一些帖子。我担心如果我使用后一种解决方案,那么可能无法对记录进行分页,因为 Instagram API 是否设置为启用前向分页而不是后向分页?

我曾多次尝试在 Google Instagram API 开发人员组中发布问题,但我的消息似乎都没有出现在论坛中,所以我想我会求助于可信赖的 stackoverflow

4

0 回答 0