I am using pjscrape to scrape content from dynamic pages generated by a site. Please see code below. I cant figure out what I need to do to get it to print out the url of the scraped page in the json variables dumped to a file. I have tried various ways of doing it - including document.url etc ( see lines 3-6 that are commented out in code below ). However I cant figure out how to get the urlFound variable to get the right value. Of course, the answer might be dead simple but its eluding me. Any other way of doing this? Help!
var scraper = function() {
return {
//urlFound:$(window.location.href),
//urlFound: $(this).window.location.href,
//urlFound: _pjs.toFullUrl($(this).attr('href')),
//urlFound: _pjs.toFullUrl($(this).URL),
// Heck - how to print out the url being scraped???
name: $('h1').text(),
marin: _pjs.getText($("script:contains('marin')"))
}
};
pjs.config({
// options: 'stdout', 'file' (set in config.logFile) or 'none'
log: 'stdout',
// options: 'json' or 'csv'
format: 'json',
// options: 'stdout' or 'file' (set in config.outFile)
writer: 'file',
outFile: 'scrape_output.json'
});
pjs.addSuite({
url: 'http://www.mophie.com/index.html',
moreUrls: function() {
return _pjs.getAnchorUrls('li a');
},
scraper: scraper
});