From 95b242dc7213bc5c5757e228160cfabac68c2918 Mon Sep 17 00:00:00 2001 From: Gorka Reguero Date: Fri, 30 Jan 2026 15:24:47 +0100 Subject: [PATCH 1/2] Fix cuenca mediterraneo --- .../src/api/cuenca.api.ts | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/integrations/scraping-cuenca-mediterranea/src/api/cuenca.api.ts b/integrations/scraping-cuenca-mediterranea/src/api/cuenca.api.ts index 697542d..5d3e489 100644 --- a/integrations/scraping-cuenca-mediterranea/src/api/cuenca.api.ts +++ b/integrations/scraping-cuenca-mediterranea/src/api/cuenca.api.ts @@ -1,4 +1,17 @@ import axios from 'axios'; +import https from 'https'; + +const httpsAgent = new https.Agent({ + rejectUnauthorized: false, +}); + +const browserHeaders = { + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + Accept: + 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8', +}; /** * Gets the HTML content from the Andalusian reservoirs page. @@ -6,6 +19,31 @@ import axios from 'axios'; * @returns Promise that resolves with the page HTML */ export async function getCuencaPageHTMLContent(url: string): Promise { - const { data: html } = await axios.get(url); + const baseUrl = 'https://www.redhidrosurmedioambiente.es/saih/'; + + // First request: visit the base page to get session cookies + const sessionResponse = await axios.get(baseUrl, { + httpsAgent, + maxRedirects: 5, + headers: browserHeaders, + }); + + // Extract cookies from the response + const setCookieHeaders = sessionResponse.headers['set-cookie']; + const cookieString = setCookieHeaders + ? setCookieHeaders.map((cookie: string) => cookie.split(';')[0]).join('; ') + : ''; + + // Second request: fetch the target page with session cookies + const { data: html } = await axios.get(url, { + httpsAgent, + maxRedirects: 5, + headers: { + ...browserHeaders, + ...(cookieString ? { Cookie: cookieString } : {}), + Referer: baseUrl, + }, + }); + return html; } From 0c7d2e71f807f84c46f1a734f263cde747c016d4 Mon Sep 17 00:00:00 2001 From: Braulio Date: Sat, 31 Jan 2026 18:14:00 +0100 Subject: [PATCH 2/2] update --- .../src/api/cuenca.api.ts | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/integrations/scraping-cuenca-mediterranea/src/api/cuenca.api.ts b/integrations/scraping-cuenca-mediterranea/src/api/cuenca.api.ts index 5d3e489..b53c45a 100644 --- a/integrations/scraping-cuenca-mediterranea/src/api/cuenca.api.ts +++ b/integrations/scraping-cuenca-mediterranea/src/api/cuenca.api.ts @@ -1,16 +1,16 @@ -import axios from 'axios'; -import https from 'https'; +import axios from "axios"; +import https from "https"; const httpsAgent = new https.Agent({ rejectUnauthorized: false, }); const browserHeaders = { - 'User-Agent': - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", Accept: - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', - 'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8', + "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Language": "es-ES,es;q=0.9,en;q=0.8", }; /** @@ -19,7 +19,9 @@ const browserHeaders = { * @returns Promise that resolves with the page HTML */ export async function getCuencaPageHTMLContent(url: string): Promise { - const baseUrl = 'https://www.redhidrosurmedioambiente.es/saih/'; + // TODO: This should be move to a param in the function, so we have + // baseUrl for the session page and url for the target page. + const baseUrl = "https://www.redhidrosurmedioambiente.es/saih/"; // First request: visit the base page to get session cookies const sessionResponse = await axios.get(baseUrl, { @@ -29,10 +31,10 @@ export async function getCuencaPageHTMLContent(url: string): Promise { }); // Extract cookies from the response - const setCookieHeaders = sessionResponse.headers['set-cookie']; + const setCookieHeaders = sessionResponse.headers["set-cookie"]; const cookieString = setCookieHeaders - ? setCookieHeaders.map((cookie: string) => cookie.split(';')[0]).join('; ') - : ''; + ? setCookieHeaders.map((cookie: string) => cookie.split(";")[0]).join("; ") + : ""; // Second request: fetch the target page with session cookies const { data: html } = await axios.get(url, {