我正在使用邮递员链接检查器(http://blog.getpostman.com/2017/06/23/check-for-broken-links-on-your-website-using-a-postman-collection/)。有什么方法可以检查 xx 页面上的所有链接而不抓取这些链接?
// get environment variables
var start_url = postman.getEnvironmentVariable('start_url');
var root_url = postman.getEnvironmentVariable('root_url');
var links = JSON.parse(postman.getEnvironmentVariable('links'));
var url = postman.getEnvironmentVariable('url');
var index = parseInt(postman.getEnvironmentVariable('index'));
// increment index counter to access links in array to check
index = index + 1;
// test if link works
if (responseCode.code > 400) {
//console.log("This link is broken: ", url);
tests[("This link is broken: ", url)] = false;
} else {
tests["Link works"] = true;
}
// if the current url includes the start_url, then this is an internal link and we should crawl it for more links
if (url.includes(start_url)) {
// load the response body as HTML using cheerio, get the <a> tags
var $ = cheerio.load(responseBody);
$('a').each(function (index) {
var link = $(this).attr('href');
// add links to the links array if not already in there
// if you have additional links you would like to exclude, for example, ads, you can add this criteria as well
if (!links.includes(link)) {
links.push(link);
}
});
}
// if we've gone through all the links, return early
if (links.length - 1 === index) {
console.log('no more links to check');
return;
}
// if link is a relative one, prepend with root_url
url = links[index]
if (! /^https?:\/\//.test(url)) {
url = root_url + url;
}
// update environment variable values
postman.setEnvironmentVariable("links", JSON.stringify(links));
postman.setEnvironmentVariable("url", url);
postman.setEnvironmentVariable("index", index);
// continue calling the same request until all links are checked
postman.setNextRequest('Check URL');