I would say both options are pretty much as valid. Having multiple crawlers is certainly simpler although doing everything in one can be more efficient (as you can handle all users at once). I would argue to start with the first option until you get better feeling how to handle the second one properly.
This version I present is the simplest as it assumes the pages you access automatically redirect to the login page and from it. If it is not the case, you just need to do it with the labels.
// Let's assume you have some object with your users.
// This may in fact be loaded from input or somewhere else but for simplicity, let's define it right away
const users = {
stevejobs: {
credentials: {
username: 'stevejobs@gmail.com',
password: '123',
},
cookies: null, // Cookies can be also loaded so you can use persistent login
},
billgates: {
credentials: {
username: 'billgates@gmail.com',
password: '123',
},
cookies: null,
},
// etc...
};
const myUrls = ['https://resource1.com', 'https://resource2.com']; // replace with real URLs
// initialize request queue
const requestQueue = await Apify.openRequestQueue();
// Now we will loop over the users and for each of them define a crawler and run it
for (const user of Object.keys(users)) {
// enqueue some pages
for (const url of myUrls)
await requestQueue.addRequest({
url,
uniqueKey: `${url}_${user}` // Otherwise the queue would dedup them
});
}
const crawler = new Apify.PuppeteerCrawler({
requestQueue,
gotoFunction: async ({ page, request }) => {
// if you have cookies, you simply add them to the page
const { cookies } = users[user];
if (cookies) {
await page.setCookie(...cookies);
}
return page.goto(request.url);
},
handlePageFunction: async ({ page, request }) => {
// Check if you are logged in by some selector, if not log in
const loggedIn = $('am-i-logged'); // Change to real selector
if (!loggedIn) {
// log in with credentials
const { username, password } = users[user].credentials;
// do your login
// ...
// wait for redirect
// then we save cookies
const cookies = await page.cookies();
users[user].cookies = cookies;
}
// Usually the log in page will redirect directly to the resource so we can scrape data right away
const data = scrapeData(); // replace with real function
await Apify.pushData(data);
}
})
await crawler.run();
}