Add screen
This commit is contained in:
parent
01d79c06da
commit
ffbf63a15d
1 changed files with 60 additions and 60 deletions
|
|
@ -91,83 +91,83 @@ const getUserProfile = async (req, res) => {
|
|||
}
|
||||
};
|
||||
|
||||
const scrapeLinkedInProfile = async (profileUrl) => {
|
||||
const scrapeLinkedInProfile = async (profileUrl, email, password) => {
|
||||
console.log("Scraping LinkedIn profile:", profileUrl);
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless: true, // headless mode pour serveur
|
||||
headless: true,
|
||||
args: ["--no-sandbox", "--disable-setuid-sandbox"],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// 1. Login
|
||||
await page.goto("https://www.linkedin.com/login", {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: 60000,
|
||||
});
|
||||
console.log("Login page loaded");
|
||||
try {
|
||||
// === 1. Login ===
|
||||
await page.goto("https://www.linkedin.com/login", {
|
||||
waitUntil: "networkidle",
|
||||
timeout: 60000,
|
||||
});
|
||||
console.log("Login page loaded");
|
||||
await page.screenshot({ path: "step1_login.png", fullPage: true });
|
||||
|
||||
await page.fill("#username", email, { delay: 50 });
|
||||
await page.fill("#password", password, { delay: 50 });
|
||||
await Promise.all([
|
||||
page.click('[type="submit"]'),
|
||||
page.waitForNavigation({ waitUntil: "domcontentloaded", timeout: 60000 }),
|
||||
]);
|
||||
console.log("Logged in");
|
||||
await page.fill("#username", email, { delay: 50 });
|
||||
await page.fill("#password", password, { delay: 50 });
|
||||
|
||||
// Aller sur le profil et attendre le contenu
|
||||
await page.goto(profileUrl, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: 60000,
|
||||
});
|
||||
await page.waitForSelector("h1", { timeout: 60000 });
|
||||
await Promise.all([
|
||||
page.click('[type="submit"]'),
|
||||
page.waitForNavigation({ waitUntil: "networkidle", timeout: 60000 }),
|
||||
]);
|
||||
console.log("Logged in");
|
||||
await page.screenshot({ path: "step2_logged_in.png", fullPage: true });
|
||||
|
||||
// Scroll pour charger lazy sections
|
||||
await page.evaluate(() => window.scrollBy(0, window.innerHeight));
|
||||
await page.waitForTimeout(2000);
|
||||
// === 2. Aller sur le profil ===
|
||||
await page.goto(profileUrl, {
|
||||
waitUntil: "networkidle",
|
||||
timeout: 60000,
|
||||
});
|
||||
await page.waitForSelector("h1", { timeout: 60000 });
|
||||
console.log("Profile page loaded");
|
||||
await page.screenshot({ path: "step3_profile_loaded.png", fullPage: true });
|
||||
|
||||
// 2. Aller sur le profil
|
||||
await page.goto(profileUrl, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: 60000,
|
||||
});
|
||||
// Scroll pour charger contenu lazy
|
||||
await page.evaluate(() => window.scrollBy(0, window.innerHeight));
|
||||
await page.waitForTimeout(2000);
|
||||
await page.screenshot({ path: "step4_scrolled.png", fullPage: true });
|
||||
|
||||
// Aller sur le profil et attendre le contenu
|
||||
await page.goto(profileUrl, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: 60000,
|
||||
});
|
||||
await page.waitForSelector("h1", { timeout: 60000 });
|
||||
// === 3. Extraire les infos ===
|
||||
const profileData = await page.evaluate(() => {
|
||||
const getText = (selector) =>
|
||||
document.querySelector(selector)?.innerText || null;
|
||||
const getAllText = (selector) =>
|
||||
Array.from(document.querySelectorAll(selector)).map((el) =>
|
||||
el.innerText.trim()
|
||||
);
|
||||
|
||||
// Scroll pour charger lazy sections
|
||||
await page.evaluate(() => window.scrollBy(0, window.innerHeight));
|
||||
await page.waitForTimeout(2000);
|
||||
return {
|
||||
name: getText("h1"),
|
||||
headline: getText(".pv-text-details__left-panel .text-body-medium"),
|
||||
location: getText(".pv-text-details__left-panel .text-body-small"),
|
||||
about:
|
||||
getText(".pv-about-section") ||
|
||||
getText(".pv-shared-text-with-see-more"),
|
||||
experiences: getAllText("section#experience-section li"),
|
||||
education: getAllText("section#education-section li"),
|
||||
skills: getAllText(".pv-skill-category-entity__name-text"),
|
||||
};
|
||||
});
|
||||
|
||||
// 3. Extraire infos
|
||||
const profileData = await page.evaluate(() => {
|
||||
const getText = (selector) =>
|
||||
document.querySelector(selector)?.innerText || null;
|
||||
const getAllText = (selector) =>
|
||||
Array.from(document.querySelectorAll(selector)).map((el) =>
|
||||
el.innerText.trim()
|
||||
);
|
||||
await page.screenshot({ path: "step5_data_extracted.png", fullPage: true });
|
||||
|
||||
return {
|
||||
name: getText("h1"),
|
||||
headline: getText(".pv-text-details__left-panel .text-body-medium"),
|
||||
location: getText(".pv-text-details__left-panel .text-body-small"),
|
||||
about:
|
||||
getText(".pv-about-section") ||
|
||||
getText(".pv-shared-text-with-see-more"),
|
||||
experiences: getAllText("section#experience-section li"),
|
||||
education: getAllText("section#education-section li"),
|
||||
skills: getAllText(".pv-skill-category-entity__name-text"),
|
||||
};
|
||||
});
|
||||
console.log("Data extracted:", profileData);
|
||||
|
||||
await browser.close();
|
||||
return profileData;
|
||||
await browser.close();
|
||||
return profileData;
|
||||
} catch (err) {
|
||||
console.error("❌ Erreur pendant le scraping:", err.message);
|
||||
await page.screenshot({ path: "error.png", fullPage: true });
|
||||
await browser.close();
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue