Add screen

This commit is contained in:
AMAICDAX 2025-09-10 11:25:43 +02:00
parent 01d79c06da
commit ffbf63a15d

View file

@ -91,83 +91,83 @@ const getUserProfile = async (req, res) => {
} }
}; };
const scrapeLinkedInProfile = async (profileUrl) => { const scrapeLinkedInProfile = async (profileUrl, email, password) => {
console.log("Scraping LinkedIn profile:", profileUrl); console.log("Scraping LinkedIn profile:", profileUrl);
const browser = await chromium.launch({ const browser = await chromium.launch({
headless: true, // headless mode pour serveur headless: true,
args: ["--no-sandbox", "--disable-setuid-sandbox"], args: ["--no-sandbox", "--disable-setuid-sandbox"],
}); });
const page = await browser.newPage(); const page = await browser.newPage();
// 1. Login try {
await page.goto("https://www.linkedin.com/login", { // === 1. Login ===
waitUntil: "domcontentloaded", await page.goto("https://www.linkedin.com/login", {
timeout: 60000, waitUntil: "networkidle",
}); timeout: 60000,
console.log("Login page loaded"); });
console.log("Login page loaded");
await page.screenshot({ path: "step1_login.png", fullPage: true });
await page.fill("#username", email, { delay: 50 }); await page.fill("#username", email, { delay: 50 });
await page.fill("#password", password, { delay: 50 }); await page.fill("#password", password, { delay: 50 });
await Promise.all([
page.click('[type="submit"]'),
page.waitForNavigation({ waitUntil: "domcontentloaded", timeout: 60000 }),
]);
console.log("Logged in");
// Aller sur le profil et attendre le contenu await Promise.all([
await page.goto(profileUrl, { page.click('[type="submit"]'),
waitUntil: "domcontentloaded", page.waitForNavigation({ waitUntil: "networkidle", timeout: 60000 }),
timeout: 60000, ]);
}); console.log("Logged in");
await page.waitForSelector("h1", { timeout: 60000 }); await page.screenshot({ path: "step2_logged_in.png", fullPage: true });
// Scroll pour charger lazy sections // === 2. Aller sur le profil ===
await page.evaluate(() => window.scrollBy(0, window.innerHeight)); await page.goto(profileUrl, {
await page.waitForTimeout(2000); waitUntil: "networkidle",
timeout: 60000,
});
await page.waitForSelector("h1", { timeout: 60000 });
console.log("Profile page loaded");
await page.screenshot({ path: "step3_profile_loaded.png", fullPage: true });
// 2. Aller sur le profil // Scroll pour charger contenu lazy
await page.goto(profileUrl, { await page.evaluate(() => window.scrollBy(0, window.innerHeight));
waitUntil: "domcontentloaded", await page.waitForTimeout(2000);
timeout: 60000, await page.screenshot({ path: "step4_scrolled.png", fullPage: true });
});
// Aller sur le profil et attendre le contenu // === 3. Extraire les infos ===
await page.goto(profileUrl, { const profileData = await page.evaluate(() => {
waitUntil: "domcontentloaded", const getText = (selector) =>
timeout: 60000, document.querySelector(selector)?.innerText || null;
}); const getAllText = (selector) =>
await page.waitForSelector("h1", { timeout: 60000 }); Array.from(document.querySelectorAll(selector)).map((el) =>
el.innerText.trim()
);
// Scroll pour charger lazy sections return {
await page.evaluate(() => window.scrollBy(0, window.innerHeight)); name: getText("h1"),
await page.waitForTimeout(2000); headline: getText(".pv-text-details__left-panel .text-body-medium"),
location: getText(".pv-text-details__left-panel .text-body-small"),
about:
getText(".pv-about-section") ||
getText(".pv-shared-text-with-see-more"),
experiences: getAllText("section#experience-section li"),
education: getAllText("section#education-section li"),
skills: getAllText(".pv-skill-category-entity__name-text"),
};
});
// 3. Extraire infos await page.screenshot({ path: "step5_data_extracted.png", fullPage: true });
const profileData = await page.evaluate(() => {
const getText = (selector) =>
document.querySelector(selector)?.innerText || null;
const getAllText = (selector) =>
Array.from(document.querySelectorAll(selector)).map((el) =>
el.innerText.trim()
);
return { console.log("Data extracted:", profileData);
name: getText("h1"),
headline: getText(".pv-text-details__left-panel .text-body-medium"),
location: getText(".pv-text-details__left-panel .text-body-small"),
about:
getText(".pv-about-section") ||
getText(".pv-shared-text-with-see-more"),
experiences: getAllText("section#experience-section li"),
education: getAllText("section#education-section li"),
skills: getAllText(".pv-skill-category-entity__name-text"),
};
});
await browser.close(); await browser.close();
return profileData; return profileData;
} catch (err) {
console.error("❌ Erreur pendant le scraping:", err.message);
await page.screenshot({ path: "error.png", fullPage: true });
await browser.close();
throw err;
}
}; };
module.exports = { module.exports = {