0

我正在从N网址中提取文本。首先我得到 N 个 url,linksOnPage然后我运行一个doOnPage函数来从每个 url 中获取文本。当我运行代码时,只有 N 个 url 中的 1 个通过该函数得到处理。我认为这是因为处理功能是异步运行的。如何将它们堆叠在队列中并全部运行/有什么更好的方法来做到这一点?

这是主要的JS代码:

var nodeio, linksOnPage, lyricsFromLink, db;

nodeio = require('node.io');
db = require('./db');
db.loadDB();

var loadSong =  function(artist, title, lyrics){
    console.log("loadSong being called");
    var newSongObj = {};
    newSongObj['artist'] = artist;  
    newSongObj['title'] = title;
    newSongObj['lyrics'] = lyrics;
    //store the lyrics in a mongo table
    var newSong = new db.Song(newSongObj);
    newSong.save(function(err) {
        if(err){
            throw err;
        } else{
            console.log("saved with no errors!");
        }
    });
};
// generic utility for getting links on a page and running a function on each one
exports.linksOnPage = function(pageObj, linkSelector, doOnPage, contentSelector) {
    nodeio.scrape(function(){
        this.getHtml(pageObj.pageUrl, function(err, $) {
            var links = [];
            var i = 0;
            $(linkSelector).each(function(link) {
                var fullLink = pageObj.rootUrl + link.attribs.href
                links.push(fullLink);
                //run a function on each link
                console.log('getting lyrics for song: ', i);
                doOnPage(pageObj.artist, fullLink, contentSelector);
                i = i+1;
            });
            //this.emit(links);
        });
    });
}

// get the lyrics for a specific song 
exports.lyricsFromLink = function(artist, pageUrl, lyricsSelector) {
    nodeio.scrape(function(){
        this.getHtml(pageUrl, function(err, $) {
            var lyrics = "";
            console.log('before each statement');
            $(lyricsSelector).each(function(lyricParagraph) {
                lyrics = lyrics + " " + lyricParagraph.text;
            });
            console.log('after each statement');
            loadSong(artist, pageUrl, lyrics);
            this.emit(lyrics)
        });
    });
}
4

0 回答 0