add scraping

This commit is contained in:
AMAICDAX 2025-09-09 15:57:12 +02:00
parent 35daf83b92
commit 21c488eeba
3 changed files with 82 additions and 20 deletions

View file

@ -2,12 +2,16 @@
const axios = require("axios");
const querystring = require("querystring");
import puppeteer from "puppeteer";
const clientId = "780w7gsy8eysmj";
const clientSecret = "WPL_AP1.w6OTTkAndAdT3PYF.UZEcwQ==";
const redirectUri = "http://localhost:4200/api/auth/linkedin/callback";
const scope = "openid profile email";
const email = "amaury@maizy.net";
const password = "2Qh*fJrp+l7M6g>8P~}/S$Bc2Yvf&-vd";
let accessToken = "";
const authenticateUser = (req, res) => {
@ -87,8 +91,55 @@ const getUserProfile = async (req, res) => {
}
};
const scrapeLinkedInProfile = async (profileUrl) => {
const browser = await puppeteer.launch({ headless: false }); // headless: true si tu veux sans UI
const page = await browser.newPage();
// 1. Aller sur la page de login
await page.goto("https://www.linkedin.com/login", {
waitUntil: "networkidle2",
});
// 2. Connexion avec identifiants
await page.type("#username", email, { delay: 50 });
await page.type("#password", password, { delay: 50 });
await Promise.all([
page.click('[type="submit"]'),
page.waitForNavigation({ waitUntil: "networkidle2" }),
]);
// 3. Aller sur le profil
await page.goto(profileUrl, { waitUntil: "networkidle2" });
// 4. Extraire les infos
const profileData = await page.evaluate(() => {
const getText = (selector) =>
document.querySelector(selector)?.innerText || null;
const getAllText = (selector) =>
Array.from(document.querySelectorAll(selector)).map((el) =>
el.innerText.trim()
);
return {
name: getText(".pv-text-details__left-panel h1"),
headline: getText(".pv-text-details__left-panel .text-body-medium"),
location: getText(".pv-text-details__left-panel .text-body-small"),
about:
getText(".pv-about-section") ||
getText(".display-flex.ph5.pv3 .break-words"),
experiences: getAllText(".pv-entity__summary-info"),
education: getAllText(".pv-education-entity"),
skills: getAllText(".pv-skill-category-entity__name-text"),
};
});
await browser.close();
return profileData;
};
module.exports = {
authenticateUser,
handleCallback,
getUserProfile,
scrapeLinkedInProfile,
};

View file

@ -10,4 +10,14 @@ router.get("/auth/linkedin", authenticateUser);
// Callback route for LinkedIn OAuth2
router.get("/auth/linkedin/callback", handleCallback);
// Traitement du formulaire d'URL LinkedIn
router.post("/me/link", express.urlencoded({ extended: true }), (req, res) => {
if (!req.session.user) return res.redirect("/");
req.session.user.linkedinUrl = req.body.linkedinUrl;
res.send(`
<p>URL LinkedIn enregistrée : <a href="${req.body.linkedinUrl}" target="_blank">${req.body.linkedinUrl}</a></p>
<a href="/me">Retour</a>
`);
});
module.exports = router;

View file

@ -7,6 +7,11 @@ const { getUserProfile } = require("../controllers/linkedin");
// Page d'accueil
router.get("/", (req, res) => {
const user = req.session.user;
if (user) {
return res.redirect("/me");
}
res.send(`
<h1>Welcome to the CV Generator Home Page!</h1>
<a href="/api/auth/linkedin">
@ -19,30 +24,26 @@ router.get("/", (req, res) => {
router.get("/profile", getUserProfile);
// Route protégée /me
router.get("/me", (req, res) => {
router.get("/me", async (req, res) => {
const user = req.session.user;
if (!user || !user.nom || !user.img) {
if (!user) {
return res.redirect("/");
}
res.send(`
<h2>Bienvenue, ${user.nom} !</h2>
<img src="${user.img}" alt="Photo de profil" width="100" height="100" style="border-radius:50%;" />
<form method="POST" action="/me/link">
<label for="linkedinUrl">Votre URL LinkedIn :</label>
<input type="url" id="linkedinUrl" name="linkedinUrl" placeholder="https://www.linkedin.com/in/votre-profil" required>
<button type="submit">Enregistrer</button>
</form>
`);
});
// Traitement du formulaire d'URL LinkedIn
router.post("/me/link", express.urlencoded({ extended: true }), (req, res) => {
if (!req.session.user) return res.redirect("/");
req.session.user.linkedinUrl = req.body.linkedinUrl;
res.send(`
<p>URL LinkedIn enregistrée : <a href="${req.body.linkedinUrl}" target="_blank">${req.body.linkedinUrl}</a></p>
<a href="/me">Retour</a>
`);
if (!user.linkedinUrl) {
return res.send(`
<h2>Bienvenue, ${user.nom} !</h2>
<form method="POST" action="/api/me/link">
<label for="linkedinUrl">Votre URL LinkedIn :</label>
<input type="url" id="linkedinUrl" name="linkedinUrl" placeholder="https://www.linkedin.com/in/votre-profil" required>
<button type="submit">Enregistrer</button>
</form>
`);
}
const profile = await scrapeLinkedInProfile(user.linkedinUrl);
res.send(profile);
});
module.exports = router;