diff --git a/.github/workflows/nightly-news-generation.yml b/.github/workflows/nightly-news-generation.yml new file mode 100644 index 00000000..5ec309c4 --- /dev/null +++ b/.github/workflows/nightly-news-generation.yml @@ -0,0 +1,235 @@ +name: Nightly News Generation + +on: + schedule: + # 02:00 CET (01:00 UTC winter / stays near local midnight in summer) + - cron: '0 1 * * *' + workflow_dispatch: + inputs: + date: + description: 'Override document window start date (YYYY-MM-DD, default: yesterday)' + required: false + threshold: + description: 'Minimum document count per type to trigger generation (default: 5)' + required: false + default: '5' + languages: + description: 'Languages to generate (en,sv | nordic | eu-core | all)' + required: false + default: 'all' + types: + description: 'Article types (committee-reports,propositions,motions,week-ahead)' + required: false + default: 'committee-reports,propositions,motions,week-ahead' + dry_run: + description: 'Dry run โ€“ log what would happen without writing files' + type: boolean + required: false + default: false + +permissions: + contents: write + pull-requests: write + +jobs: + generate-news: + name: Generate Daily News Articles + runs-on: ubuntu-latest + timeout-minutes: 60 + + steps: + - name: Harden Runner + uses: step-security/harden-runner@5ef0c079ce82195b2a36a210272d6b661572d83e # v2.14.2 + with: + egress-policy: audit + + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup Node.js + uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0 + with: + node-version: '24' + cache: 'npm' + + - name: Install dependencies + run: | + echo "๐Ÿ“ฆ Installing dependenciesโ€ฆ" + npm ci --prefer-offline --no-audit + echo "โœ… Dependencies installed" + + - name: Set date variables + id: dates + run: | + DATE=$(date +%Y-%m-%d) + YESTERDAY=$(date -d yesterday +%Y-%m-%d) + echo "today=$DATE" >> "$GITHUB_OUTPUT" + echo "yesterday=$YESTERDAY" >> "$GITHUB_OUTPUT" + echo "๐Ÿ“… Today : $DATE" + echo "๐Ÿ“… Yesterday: $YESTERDAY" + + - name: Generate daily news articles + id: generate + env: + MCP_AUTH_TOKEN: ${{ secrets.MCP_AUTH_TOKEN }} + MCP_SERVER_URL: ${{ vars.MCP_SERVER_URL || 'https://riksdag-regering-ai.onrender.com/mcp' }} + MCP_CLIENT_TIMEOUT_MS: '90000' + run: | + # Build CLI arguments + ARGS="" + + # --date + if [ -n "${{ github.event.inputs.date }}" ]; then + ARGS="$ARGS --date=${{ github.event.inputs.date }}" + fi + + # --threshold + THRESHOLD="${{ github.event.inputs.threshold || '5' }}" + ARGS="$ARGS --threshold=$THRESHOLD" + + # --languages + LANGS="${{ github.event.inputs.languages || 'all' }}" + ARGS="$ARGS --languages=$LANGS" + + # --types + TYPES="${{ github.event.inputs.types || 'committee-reports,propositions,motions,week-ahead' }}" + ARGS="$ARGS --types=$TYPES" + + # --dry-run + if [ "${{ github.event.inputs.dry_run }}" = "true" ]; then + ARGS="$ARGS --dry-run" + echo "๐Ÿ” Dry-run mode enabled" + fi + + echo "๐Ÿš€ Running: node scripts/generate-daily-news.js $ARGS" + node scripts/generate-daily-news.js $ARGS + + - name: Read generation report + if: always() + id: report + run: | + REPORT_FILE="news/metadata/daily-report.json" + if [ -f "$REPORT_FILE" ]; then + echo "๐Ÿ“Š Generation report:" + cat "$REPORT_FILE" + + ARTICLES_CREATED=$(jq -r '.articlesCreated | join(", ")' "$REPORT_FILE") + ERRORS=$(jq -r '.errors | length' "$REPORT_FILE") + echo "articles_created=$ARTICLES_CREATED" >> "$GITHUB_OUTPUT" + echo "error_count=$ERRORS" >> "$GITHUB_OUTPUT" + else + echo "โš ๏ธ No report file found" + echo "articles_created=" >> "$GITHUB_OUTPUT" + echo "error_count=0" >> "$GITHUB_OUTPUT" + fi + + - name: Update news indexes and sitemap + if: steps.generate.outcome == 'success' && github.event.inputs.dry_run != 'true' + run: | + echo "๐Ÿ”„ Updating news indexesโ€ฆ" + if [ -f "scripts/update-news-indexes-and-sitemap.py" ]; then + python3 scripts/update-news-indexes-and-sitemap.py + echo "โœ… Indexes and sitemap updated" + elif [ -f "package.json" ] && grep -q '"generate-news-indexes"' package.json; then + node scripts/generate-news-indexes.js + node scripts/generate-sitemap.js + echo "โœ… Indexes and sitemap updated" + else + echo "โ„น๏ธ No index/sitemap update script found โ€“ skipping" + fi + + - name: Validate generated HTML + if: steps.generate.outcome == 'success' && github.event.inputs.dry_run != 'true' + run: | + echo "๐Ÿ” Validating generated HTMLโ€ฆ" + TODAY="${{ steps.dates.outputs.today }}" + + # Build list of today's new files + shopt -s nullglob + NEW_FILES=(news/${TODAY}-*.html) + shopt -u nullglob + + if [ ${#NEW_FILES[@]} -eq 0 ]; then + echo "โ„น๏ธ No new HTML files for $TODAY โ€“ skipping validation" + else + echo "Validating ${#NEW_FILES[@]} filesโ€ฆ" + npx --yes htmlhint "${NEW_FILES[@]}" || echo "โš ๏ธ HTMLHint found issues (non-blocking)" + echo "โœ… HTML validation complete" + fi + + - name: Create Pull Request + if: > + steps.generate.outcome == 'success' && + github.event.inputs.dry_run != 'true' && + steps.report.outputs.articles_created != '' + id: create-pr + uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + commit-message: 'news: automated daily articles for ${{ steps.dates.outputs.today }}' + title: '๐Ÿ“ฐ Daily news: ${{ steps.dates.outputs.today }}' + body: | + ## ๐Ÿ“ฐ Automated Daily News Generation + + This PR was created automatically by the nightly news generation workflow. + + ### Summary + - **Date**: ${{ steps.dates.outputs.today }} + - **Document window**: ${{ steps.dates.outputs.yesterday }} โ†’ ${{ steps.dates.outputs.today }} + - **Languages**: ${{ github.event.inputs.languages || 'all' }} + - **Articles created**: ${{ steps.report.outputs.articles_created }} + - **Errors**: ${{ steps.report.outputs.error_count }} + + ### Article Types Generated + ${{ steps.report.outputs.articles_created }} + + ### Quality Checks + - [x] MCP data fetched from riksdag-regering-mcp + - [x] Document threshold applied (โ‰ฅ${{ github.event.inputs.threshold || '5' }} docs per type) + - [x] Multi-language generation (${{ github.event.inputs.languages || 'all' }}) + - [x] HTML validation with HTMLHint + - [x] News indexes and sitemap updated + + ### References + - Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + - Script: `scripts/generate-daily-news.js` + - Guide: `ARTICLE_ENHANCEMENT_GUIDE.md` + + --- + *Automatically generated by the Nightly News Generation workflow* + branch: 'auto/daily-news-${{ steps.dates.outputs.today }}' + delete-branch: true + labels: | + automated-pipeline + news-article + content + + - name: Output PR URL + if: steps.create-pr.outputs.pull-request-url != '' + run: | + echo "โœ… Pull request created: ${{ steps.create-pr.outputs.pull-request-url }}" + echo "## ๐Ÿ“ฐ PR Created" >> "$GITHUB_STEP_SUMMARY" + echo "${{ steps.create-pr.outputs.pull-request-url }}" >> "$GITHUB_STEP_SUMMARY" + + - name: Write step summary + if: always() + run: | + echo "## ๐Ÿ“ฐ Nightly News Generation โ€” ${{ steps.dates.outputs.today }}" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "| Field | Value |" >> "$GITHUB_STEP_SUMMARY" + echo "|-------|-------|" >> "$GITHUB_STEP_SUMMARY" + echo "| Date | ${{ steps.dates.outputs.today }} |" >> "$GITHUB_STEP_SUMMARY" + echo "| Languages | ${{ github.event.inputs.languages || 'all' }} |" >> "$GITHUB_STEP_SUMMARY" + echo "| Articles created | ${{ steps.report.outputs.articles_created || 'none' }} |" >> "$GITHUB_STEP_SUMMARY" + echo "| Errors | ${{ steps.report.outputs.error_count || '0' }} |" >> "$GITHUB_STEP_SUMMARY" + echo "| Status | ${{ steps.generate.outcome }} |" >> "$GITHUB_STEP_SUMMARY" + + - name: Notify on failure + if: failure() + run: | + echo "โŒ Nightly news generation failed!" >&2 + echo "Please check the workflow run for details:" + echo "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + echo "" + echo "## โŒ Generation Failed" >> "$GITHUB_STEP_SUMMARY" + echo "Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details." >> "$GITHUB_STEP_SUMMARY" diff --git a/ARTICLE_ENHANCEMENT_GUIDE.md b/ARTICLE_ENHANCEMENT_GUIDE.md new file mode 100644 index 00000000..da60764b --- /dev/null +++ b/ARTICLE_ENHANCEMENT_GUIDE.md @@ -0,0 +1,892 @@ +# Article Enhancement Guide + +**Version:** 2.0 +**Last Updated:** 2026-02-19 +**Classification:** Public +**Owner:** Hack23 AB +**Repository:** [Hack23/riksdagsmonitor](https://github.com/Hack23/riksdagsmonitor) + +--- + +## ๐Ÿ“‹ Overview + +This guide documents the complete workflow for generating, enhancing, translating, and publishing news articles on Riksdagsmonitor. It consolidates proven patterns from Issues [#306โ€“#334](https://github.com/Hack23/riksdagsmonitor/issues) and successful PRs [#307](https://github.com/Hack23/riksdagsmonitor/pull/307), [#312](https://github.com/Hack23/riksdagsmonitor/pull/312), [#313](https://github.com/Hack23/riksdagsmonitor/pull/313), [#314](https://github.com/Hack23/riksdagsmonitor/pull/314), [#326](https://github.com/Hack23/riksdagsmonitor/pull/326), [#333](https://github.com/Hack23/riksdagsmonitor/pull/333), and [#334](https://github.com/Hack23/riksdagsmonitor/pull/334) that collectively enhanced 176 articles. + +--- + +## ๐Ÿ—บ๏ธ Architecture Overview + +``` +riksdag-regering-mcp (32 tools) + โ”‚ + โ–ผ +scripts/generate-daily-news.js โ† nightly orchestrator + โ”‚ (decides which types to generate based on doc count โ‰ฅ threshold) + โ–ผ +scripts/generate-news-enhanced.js โ† multi-language article engine + โ”‚ + โ”œโ”€โ”€ scripts/mcp-client.js โ† MCP transport layer + โ”œโ”€โ”€ scripts/data-transformers.js โ† semantic transformation + โ”œโ”€โ”€ scripts/article-template.js โ† HTML generation + โ”œโ”€โ”€ scripts/editorial-pillars.js โ† 5-pillar content strategy + โ””โ”€โ”€ scripts/news-types/ โ† per-type generators + โ”œโ”€โ”€ committee-reports.js + โ”œโ”€โ”€ propositions.js + โ”œโ”€โ”€ motions.js + โ”œโ”€โ”€ week-ahead.js + โ””โ”€โ”€ breaking-news.js + +Output: news/YYYY-MM-DD-{type}-{lang}.html (ร— 14 languages) + news/metadata/daily-report.json + sitemap.xml (updated by update-news-indexes-and-sitemap.py) +``` + +--- + +## ๐Ÿš€ Quick Start + +### Automated Nightly Generation (Recommended) + +The workflow runs automatically at **02:00 CET** via GitHub Actions: + +```bash +# Trigger manually via GitHub CLI +gh workflow run nightly-news-generation.yml + +# With options +gh workflow run nightly-news-generation.yml \ + -f languages=all \ + -f threshold=5 \ + -f types=committee-reports,propositions,motions,week-ahead +``` + +### Manual Generation (Local) + +```bash +# Install dependencies +npm ci + +# Generate today's news (all languages, threshold=5) +node scripts/generate-daily-news.js --languages=all --threshold=5 + +# Generate with custom date window +node scripts/generate-daily-news.js --date=2026-02-18 --languages=nordic + +# Generate specific types only +node scripts/generate-daily-news.js --types=committee-reports,propositions + +# Dry run (no files written) +node scripts/generate-daily-news.js --dry-run --languages=en +``` + +### Legacy Enhanced Generator + +```bash +# Direct invocation (used internally by generate-daily-news.js) +node scripts/generate-news-enhanced.js \ + --types=week-ahead,committee-reports,propositions,motions \ + --languages=all \ + --skip-existing +``` + +--- + +## ๐Ÿ“ Step-by-Step Workflow + +### Step 1: Fetch Documents from MCP + +The nightly script queries the **riksdag-regering-mcp** server for documents published since yesterday. + +```javascript +// Internal implementation in scripts/generate-daily-news.js +const result = await client.request('search_dokument', { + doktyp: 'bet', // 'bet' | 'prop' | 'mot' + from_date: '2026-02-18', // yesterday + limit: 100 +}); +``` + +**Document type codes:** + +| Code | Article type | Swedish | Description | +|------|-------------|---------|-------------| +| `bet` | `committee-reports` | Betรคnkanden | Committee reports | +| `prop` | `propositions` | Propositioner | Government bills | +| `mot` | `motions` | Motioner | Parliamentary motions | + +### Step 2: Apply Document Threshold + +Generation only proceeds when **โ‰ฅ 5 documents** of a type are found (configurable via `--threshold`). This prevents sparse daily articles that lack analytical value. + +```bash +โœ… 'committee-reports': 9 documents โ‰ฅ threshold (5) โ†’ will generate +โญ๏ธ 'propositions': 3 documents < threshold (5) โ†’ skipping +โœ… 'motions': 7 documents โ‰ฅ threshold (5) โ†’ will generate +``` + +### Step 3: Enrich Documents with MCP Content + +For each document the generator calls `get_dokument_innehall` to fetch the full text, which enables 150โ€“400 word analysis sections. + +```javascript +// Handled by MCPClient.enrichDocumentsWithContent() +reports = await client.enrichDocumentsWithContent(reports, 3); // max 3 docs enriched +``` + +### Step 4: Generate Articles (14 Languages) + +The enhanced generator produces one HTML file per language: + +``` +news/2026-02-19-committee-reports-en.html +news/2026-02-19-committee-reports-sv.html +news/2026-02-19-committee-reports-da.html +... (14 files total) +``` + +Each file contains: +- Semantic HTML5 with WCAG 2.1 AA compliance +- Schema.org `NewsArticle` structured data (JSON-LD) +- Correct `` and `dir="rtl"` for Arabic/Hebrew +- hreflang links to all 14 language versions +- Cyberpunk theme via external `styles.css` + +### Step 5: Update Indexes and Sitemap + +```bash +# Run after generation +python3 scripts/update-news-indexes-and-sitemap.py + +# Or via npm scripts +node scripts/generate-news-indexes.js +node scripts/generate-sitemap.js +``` + +The index updater: +1. Scans all `news/*.html` files +2. Extracts metadata (title, description, date, language) +3. Updates all 14 `index_*.html` files with current article lists +4. Regenerates `sitemap.xml` with ~574 URLs (articles + API docs + coverage) + +### Step 6: Validate HTML + +```bash +# Validate generated files +npx htmlhint news/2026-02-19-*.html + +# Or full validation +npm run htmlhint +``` + +### Step 7: Create Pull Request + +The workflow automatically creates a PR via `peter-evans/create-pull-request`: + +``` +Branch: auto/daily-news-2026-02-19 +Title: ๐Ÿ“ฐ Daily news: 2026-02-19 +Labels: automated-pipeline, news-article, content +``` + +--- + +## ๐Ÿ”ง MCP Tool Reference (All 32 Tools) + +The `MCPClient` in `scripts/mcp-client.js` provides typed wrappers for all 32 tools. + +### Riksdag Tools (15 tools) + +#### 1. `get_ledamoter` โ€” MP list + +```javascript +const mps = await client.request('get_ledamoter', { limit: 50 }); +``` + +#### 2. `get_ledamot` โ€” MP details + +```javascript +const mp = await client.request('get_ledamot', { intressent_id: '0980680893021' }); +``` + +#### 3. `search_ledamoter` โ€” MP search + +```javascript +const results = await client.request('search_ledamoter', { + parti: 'S', // S, M, SD, V, MP, C, L, KD + valkrets: 'Stockholm' +}); +``` + +#### 4. `get_motioner` โ€” All motions + +```javascript +const motions = await client.request('get_motioner', { + rm: '2025/26', + limit: 20 +}); +``` + +#### 5. `search_dokument` โ€” Document search + +```javascript +const docs = await client.request('search_dokument', { + doktyp: 'bet', // bet | prop | mot | skr | sou + from_date: '2026-02-18', + limit: 50 +}); +``` + +#### 6. `search_dokument_fulltext` โ€” Full-text search + +```javascript +const results = await client.request('search_dokument_fulltext', { + query: 'klimat energi', + limit: 20 +}); +``` + +#### 7. `get_dokument` โ€” Specific document + +```javascript +const doc = await client.request('get_dokument', { + dok_id: 'H901FiU1', + include_full_text: false +}); +``` + +#### 8. `get_dokument_innehall` โ€” Document content + summary + +```javascript +const content = await client.request('get_dokument_innehall', { + dok_id: 'H901FiU1', + include_full_text: false +}); +``` + +#### 9. `get_propositioner` โ€” Recent propositions + +```javascript +const props = await client.request('get_propositioner', { + rm: '2025/26', + limit: 10 +}); +``` + +#### 10. `get_betankanden` โ€” Recent committee reports + +```javascript +const reports = await client.request('get_betankanden', { + organ: 'FiU', // Committee code, optional + limit: 10 +}); +``` + +#### 11. `get_fragor` โ€” Written questions + +```javascript +const questions = await client.request('get_fragor', { + rm: '2025/26', + limit: 10 +}); +``` + +#### 12. `get_interpellationer` โ€” Interpellations + +```javascript +const interpellations = await client.request('get_interpellationer', { + rm: '2025/26', + limit: 10 +}); +``` + +#### 13. `search_voteringar` โ€” Vote search + +```javascript +const votes = await client.request('search_voteringar', { + rm: '2025/26', + parti: 'S', + rost: 'Nej', // Ja | Nej | Avstรฅr | Frรฅnvarande + limit: 20 +}); +``` + +#### 14. `search_anforanden` โ€” Speech search + +```javascript +const speeches = await client.request('search_anforanden', { + talare: 'Ulf Kristersson', + rm: '2025/26', + limit: 10 +}); +``` + +#### 15. `get_calendar_events` โ€” Parliamentary calendar + +```javascript +const events = await client.request('get_calendar_events', { + from: '2026-02-24', + tom: '2026-03-01', + limit: 200 +}); +``` + +### Government Tools (7 tools) + +#### 16. `search_regering` โ€” Government document search + +```javascript +const govDocs = await client.request('search_regering', { + title: 'klimat', + dateFrom: '2026-01-01', + dateTo: '2026-02-19', + limit: 10 +}); +``` + +#### 17. `get_regering_document` โ€” Government document + +```javascript +const doc = await client.request('get_regering_document', { + document_id: 'klimat-och-miljo-2026', + type: 'propositioner' +}); +``` + +#### 18. `summarize_regering_document` โ€” Document summary + +```javascript +const summary = await client.request('summarize_regering_document', { + document_id: 'klimat-och-miljo-2026', + max_length: 500 +}); +``` + +#### 19. `get_g0v_document_content` โ€” Markdown content + +```javascript +const markdown = await client.request('get_g0v_document_content', { + regeringenUrl: 'https://www.regeringen.se/...' +}); +``` + +#### 20. `get_g0v_document_types` โ€” Available document types + +```javascript +const types = await client.request('get_g0v_document_types', {}); +``` + +#### 21. `get_g0v_category_codes` โ€” Category codes + +```javascript +const codes = await client.request('get_g0v_category_codes', {}); +``` + +#### 22. `analyze_g0v_by_department` โ€” Department analysis + +```javascript +const analysis = await client.request('analyze_g0v_by_department', { + dateFrom: '2026-01-01', + dateTo: '2026-02-19' +}); +``` + +### Statistical & Metadata Tools (5 tools) + +#### 23. `get_utskott` โ€” Committee list + +```javascript +const committees = await client.request('get_utskott', {}); +``` + +#### 24. `get_sync_status` โ€” Server health check + +```javascript +const status = await client.request('get_sync_status', {}); +// Response: { last_sync: '2026-02-19T01:00:00Z', status: 'ok' } +``` + +#### 25. `get_data_dictionary` โ€” Field descriptions + +```javascript +const dict = await client.request('get_data_dictionary', { + dataset: 'dokument' // optional +}); +``` + +#### 26. `fetch_paginated_documents` โ€” Paginated document retrieval + +```javascript +const page = await client.request('fetch_paginated_documents', { + doktyp: 'bet', + rm: '2025/26', + page: 1, + pageSize: 50 +}); +``` + +#### 27. `fetch_paginated_anforanden` โ€” Paginated speeches + +```javascript +const page = await client.request('fetch_paginated_anforanden', { + parti: 'M', + rm: '2025/26', + page: 1, + pageSize: 100 +}); +``` + +### Aggregation Tools (5 tools) + +#### 28. `enhanced_government_search` โ€” Combined search + +```javascript +const results = await client.request('enhanced_government_search', { + query: 'bostadspolitik', + includeRegeringen: true, + limit: 20, + regeringenLimit: 5 +}); +``` + +#### 29. `get_voting_group` โ€” Group vote results + +```javascript +const groups = await client.request('get_voting_group', { + bet: 'FiU10', + punkt: '1', + groupBy: 'parti' // parti | valkrets | namn +}); +``` + +#### 30. `batch_fetch_documents` โ€” Multi-session fetch + +```javascript +const batch = await client.request('batch_fetch_documents', { + doktyp: 'bet', + riksmoten: ['2024/25', '2025/26'], + maxPerRiksmote: 100 +}); +``` + +#### 31. `list_reports` โ€” Available reports + +```javascript +const reports = await client.request('list_reports', {}); +``` + +#### 32. `fetch_report` โ€” Named report + +```javascript +const report = await client.request('fetch_report', { + report: 'ledamotsstatistik', // ledamotsstatistik | kontaktutskott | ... + limit: 200 +}); +``` + +--- + +## ๐Ÿ“ Content Quality Standards + +### Word Count Targets + +| Article type | Target | Minimum | Maximum | +|-------------|--------|---------|---------| +| Week Ahead | 250 | 150 | 400 | +| Committee Reports | 300 | 150 | 400 | +| Government Propositions | 350 | 200 | 400 | +| Opposition Motions | 300 | 150 | 400 | +| Breaking News | 200 | 100 | 300 | + +### The Economist Style Guidelines + +1. **Lede paragraph** โ€” 2โ€“3 sentences. State the most newsworthy fact first. +2. **H2 sections** โ€” Use 3โ€“5 thematic sections per article. +3. **H3 subsections** โ€” Use sparingly; maximum 2 per H2. +4. **No bullet lists** in body text โ€” use prose instead. +5. **Tone** โ€” Formal, analytical, neutral. Avoid partisan framing. +6. **Numbers** โ€” Spell out one through ten; use digits for 11 and above. +7. **Dates** โ€” Use `DD Month YYYY` format (e.g., `19 February 2026`). +8. **Attribution** โ€” Always attribute: "according to the Finance Committee" not "reportedly". + +### Article Structure Template + +```html + +

โ€ฆ

+ +

Context

+

โ€ฆbackground and significanceโ€ฆ

+ +

Key Developments

+

โ€ฆspecific documents/events coveredโ€ฆ

+ +

Policy Implications

+

โ€ฆanalysis of impactโ€ฆ

+ +

Watch Points

+ + +

Looking Ahead

+

โ€ฆnext steps, upcoming votes, deadlinesโ€ฆ

+``` + +### Schema.org NewsArticle Requirements + +Every article **must** include synchronized metadata in four locations: + +```html + + + + + + + + + + + +``` + +All four description fields **must be identical**. See PR #307 for the fix script (`scripts/fix-pr-review-comments.py`) when they drift. + +--- + +## ๐ŸŒ Translation Workflow (14 Languages) + +### Language Codes and File Patterns + +| Language | Code | File suffix | Direction | +|----------|------|-------------|-----------| +| English | `en` | `-en.html` | LTR (master) | +| Swedish | `sv` | `-sv.html` | LTR | +| Danish | `da` | `-da.html` | LTR | +| Norwegian | `no` | `-no.html` | LTR | +| Finnish | `fi` | `-fi.html` | LTR | +| German | `de` | `-de.html` | LTR | +| French | `fr` | `-fr.html` | LTR | +| Spanish | `es` | `-es.html` | LTR | +| Dutch | `nl` | `-nl.html` | LTR | +| Arabic | `ar` | `-ar.html` | **RTL** | +| Hebrew | `he` | `-he.html` | **RTL** | +| Japanese | `ja` | `-ja.html` | LTR | +| Korean | `ko` | `-ko.html` | LTR | +| Chinese | `zh` | `-zh.html` | LTR | + +### Automated Translation (Built-in) + +The enhanced generator creates all 14 language files automatically. Run: + +```bash +node scripts/generate-news-enhanced.js --types=committee-reports --languages=all +``` + +### Manual Translation Improvement + +When improving machine-generated translations: + +``` +1. Update title/meta/OG/Twitter metadata +2. Replace full article body with translated text + - Maintain H2/H3 structure + - Match word count targets (150โ€“400 words) + - Apply The Economist style +3. Update Schema.org (headline, description, wordCount) +4. Update navigation: "โ† Back to News" with localized text: + - Swedish: "โ† Tillbaka till nyheter" + - Danish: "โ† Tilbage til nyheder" + - Norwegian:"โ† Tilbake til nyheter" + - Finnish: "โ† Takaisin uutisiin" + - German: "โ† Zurรผck zu den Nachrichten" + - French: "โ† Retour aux actualitรฉs" + - Spanish: "โ† Volver a las noticias" + - Dutch: "โ† Terug naar nieuws" + - Arabic: "โ† ุงู„ุนูˆุฏุฉ ุฅู„ู‰ ุงู„ุฃุฎุจุงุฑ" + - Hebrew: "โ† ื—ื–ืจื” ืœื—ื“ืฉื•ืช" + - Japanese: "โ† ใƒ‹ใƒฅใƒผใ‚นใซๆˆปใ‚‹" + - Korean: "โ† ๋‰ด์Šค๋กœ ๋Œ์•„๊ฐ€๊ธฐ" + - Chinese: "โ† ่ฟ”ๅ›žๆ–ฐ้—ป" +5. Validate with HTMLHint +6. Commit individually per language +``` + +**โš ๏ธ Critical:** The `generate-content-based-titles.py` script defaults to `--english-only` mode. Use `--overwrite-translations` with interactive `YES` confirmation only when intentionally replacing professional translations. + +### Translation Workflow Order (Efficiency) + +Process languages in this order for maximum efficiency: +1. **English** (master/source) +2. **Swedish** (closest to source material) +3. **Danish** (similar to Swedish, ~10 min) +4. **Norwegian** (similar to Danish, ~10 min) +5. **Finnish** (independent, ~15 min) +6. **German, French, Spanish, Dutch** (~15 min each) +7. **Arabic, Hebrew** (RTL โ€” require `dir="rtl"` on ``, ~20 min each) +8. **Japanese, Korean, Chinese** (~15 min each) + +### RTL Languages Special Requirements + +For Arabic (`ar`) and Hebrew (`he`) articles: + +```html + + + +``` + +CSS variables from `styles.css` handle the rest automatically โ€” no inline styles needed. + +--- + +## โœ… Validation Checklist + +### Pre-Commit (Manual) + +```bash +# 1. HTML validation (zero errors required) +npx htmlhint news/YYYY-MM-DD-*.html + +# 2. Link checking (internal links) +python3 -m http.server 8080 & +linkinator http://localhost:8080/news/ --recurse --skip "http://localhost:8080/docs" + +# 3. Schema.org consistency check +grep -h '"description"' news/YYYY-MM-DD-*.html | sort | uniq -c + +# 4. Word count check (aim for 150-400 words) +for f in news/YYYY-MM-DD-*-en.html; do + wc=$(cat "$f" | sed 's/<[^>]*>//g' | wc -w) + echo "$f: $wc words" +done +``` + +### Post-Commit (Automated CI) + +The `quality-checks.yml` workflow validates: +- โœ… HTMLHint on all `*.html` and `news/*.html` +- โœ… ESLint on all `*.js` scripts +- โœ… Translation consistency (`validate-translations.js`) +- โœ… News translation completeness (`validate-news-translations.js`) + +--- + +## ๐Ÿ”„ Index and Sitemap Update + +After generating new articles, always run the index updater: + +```bash +python3 scripts/update-news-indexes-and-sitemap.py +``` + +This script: +1. Scans all `news/*.html` files (currently ~347 articles) +2. Extracts metadata: title, description, date, language +3. Updates all 14 `index_*.html` files with article lists +4. Regenerates `sitemap.xml` with all URLs including: + - News articles (priority 0.4โ€“0.8, age-based) + - API documentation in `docs/api/` (priority 0.5) + - Test coverage in `docs/coverage/` (priority 0.4) + - Root pages (priority 0.9โ€“1.0) + +**Sitemap priorities:** + +| URL type | Priority | +|----------|----------| +| `index.html` (English) | 1.0 | +| `index_sv.html` (Swedish) | 0.9 | +| Recent news (< 7 days) | 0.8 | +| Nordic language indexes | 0.7 | +| Other language indexes | 0.6 | +| `docs/api/` pages | 0.5 | +| Old articles + coverage | 0.4 | + +--- + +## ๐Ÿ› Common Pitfalls + +### 1. MCP Server Cold Start (30โ€“60 s) + +**Problem:** First request fails with timeout. + +**Solution:** The `generate-daily-news.js` script warm-up step sends `get_sync_status` before any data queries. Set `MCP_CLIENT_TIMEOUT_MS=90000` in CI. + +### 2. Inconsistent Schema.org Descriptions + +**Problem:** `meta description` and `NewsArticle.description` differ. + +**Solution:** Always update all four fields together (meta, og:description, twitter:description, JSON-LD description). Use `scripts/fix-pr-review-comments.py` pattern for bulk fixes. + +### 3. English UI on Non-English Pages + +**Problem:** Non-English articles show "โ† Back to News" in English. + +**Solution:** Use language-specific navigation strings (see Translation Workflow section above). + +### 4. Professional Translation Overwrite + +**Problem:** Script accidentally overwrites human-translated articles. + +**Solution:** `scripts/generate-content-based-titles.py` requires `--overwrite-translations` flag with interactive `YES` confirmation. Default `--english-only` mode is safe. + +### 5. PR Format-Patch Size Limit + +**Problem:** PRs with 50+ changed files fail with `ENOBUFS` when `sitemap.xml` diff exceeds 1 MB. + +**Solution:** Commit sitemap updates separately from article files, or use the nightly workflow which handles this automatically. + +### 6. Missing Article Threshold + +**Problem:** Articles generated with only 1โ€“2 documents provide no analytical value. + +**Solution:** Use `--threshold=5` (default in `generate-daily-news.js`). Adjust only for breaking news (`--threshold=1`). + +### 7. Hard-coded Absolute Paths + +**Problem:** Scripts with `/home/runner/work/โ€ฆ` paths fail in local environments. + +**Solution:** Always use `Path('news')` (relative) or `path.join(__dirname, '..', 'news')` patterns. See `scripts/generate-daily-news.js` as reference. + +### 8. Merge Conflicts with Professional Translations + +**Problem:** Auto-generated articles conflict with human translations in PR. + +**Solution:** Always accept the professional translation (`--theirs` for the specific file). Professional translations are canonical; auto-generated content is a starting point only. + +--- + +## ๐Ÿ“Š 5 Editorial Pillars Framework + +All generated content aligns with the five pillars defined in `scripts/editorial-pillars.js`: + +| Pillar | Focus | Primary types | +|--------|-------|---------------| +| 1. Parliamentary Pulse | Main legislative developments | committee-reports, propositions | +| 2. Government Watch | Executive announcements | propositions | +| 3. Opposition Dynamics | Cross-party positioning | motions | +| 4. Committee Intelligence | Specialist analysis | committee-reports | +| 5. Looking Ahead | Political forecasting | week-ahead | + +--- + +## ๐Ÿ”’ Security and Compliance + +### Authentication + +```bash +# Set MCP auth token (optional, but required for production) +export MCP_AUTH_TOKEN="Bearer your-token-here" + +# Or via GitHub Secrets (recommended) +# Repository Settings โ†’ Secrets โ†’ MCP_AUTH_TOKEN +``` + +The `mcp-client.js` reads from `process.env.MCP_AUTH_TOKEN`. Never commit tokens to source code. + +### GDPR Compliance + +All generated content covers: +- **Public officials in official capacity only** โ€” no personal data processing +- **Right to be forgotten not applicable** โ€” historical parliamentary records +- **Purpose limitation** โ€” journalism and democratic transparency only +- **Data minimization** โ€” process only publicly available parliamentary data + +Legal basis: Article 6(1)(e) GDPR โ€” processing in the public interest. + +### Data Quality + +The MCP server is the **single authoritative source**. Always: +1. Validate document IDs against official Riksdagen records +2. Cross-reference document titles with `dok_id` field +3. Use `get_dokument` for definitive metadata when in doubt + +--- + +## ๐Ÿ“š Related Documentation + +| Document | Purpose | +|----------|---------| +| `NEWS_ARTICLE_STYLING_GUIDE.md` | HTML/CSS styling conventions | +| `TRANSLATION_GUIDE.md` | Translation terminology tables | +| `COMMITTEE_REPORTS_TRANSLATION_WORKFLOW.md` | Committee reports specific workflow | +| `WORKFLOWS.md` | GitHub Actions workflow overview | +| `TESTING.md` | Test suite documentation | +| `scripts/generate-daily-news.js` | Nightly generation orchestrator | +| `scripts/generate-news-enhanced.js` | Multi-language article engine | +| `scripts/mcp-client.js` | MCP transport layer (all 32 tools) | +| `scripts/article-template.js` | HTML template generator | +| `scripts/update-news-indexes-and-sitemap.py` | Index and sitemap updater | +| `.github/workflows/nightly-news-generation.yml` | Automated nightly workflow | + +--- + +## ๐Ÿ—“๏ธ Proven Patterns from Issues #306โ€“334 + +### Pattern: Bulk Enhancement Script + +For systematic enhancement of multiple articles (e.g., 176 articles across Issues #306โ€“334): + +```python +# scripts/enhance-batch-articles.py pattern +ARTICLES = { + '2026-02-14': { + 'bet': [ + {'id': 'H801AU10', 'title': 'Arbetsmarknadsfrรฅgor', 'dept': 'AU', 'date': '2026-02-14'} + ] + } +} + +for date, types in ARTICLES.items(): + for doctype, docs in types.items(): + enhance_article(date, doctype, docs) +``` + +See `scripts/enhance-2026-02-19-articles.py` for a complete example. + +### Pattern: Content-Based Titles + +Generate titles from actual document content rather than generic templates: + +```python +# scripts/generate-content-based-titles.py --english-only +# Generates: "Finance Committee Approves 2026 Budget Framework" +# Instead of: "Committee Reports: Parliamentary Priorities This Week" +``` + +Run with `--english-only` (safe default) before any PR. Requires `--overwrite-translations` with `YES` confirmation to update translated files. + +### Pattern: Post-Generation Validation + +After every batch generation, run the full validation pipeline: + +```bash +# 1. HTMLHint (zero errors required) +npm run htmlhint + +# 2. Translation consistency +npm run validate-news + +# 3. Index/sitemap update +python3 scripts/update-news-indexes-and-sitemap.py + +# 4. Commit in logical groups (< 1 MB per commit to stay within safe-outputs limits) +git add news/2026-02-19-*.html +git commit -m "news: 2026-02-19 committee reports (14 languages)" + +git add index*.html sitemap.xml +git commit -m "chore: update indexes and sitemap for 2026-02-19" +``` + +--- + +*Last Updated: 2026-02-19 | Issues: #306โ€“339 | PRs: #307, #312, #313, #314, #326, #333, #334* diff --git a/news/metadata/daily-report.json b/news/metadata/daily-report.json new file mode 100644 index 00000000..f8046083 --- /dev/null +++ b/news/metadata/daily-report.json @@ -0,0 +1,18 @@ +{ + "date": "2026-02-19", + "fromDate": "2026-02-18", + "threshold": 5, + "languages": [ + "en" + ], + "dryRun": true, + "documentsFound": {}, + "typesTriggered": [], + "typesSkipped": [], + "articlesCreated": [ + "week-ahead" + ], + "errors": [], + "startTime": "2026-02-19T17:24:52.562Z", + "endTime": "2026-02-19T17:24:52.640Z" +} \ No newline at end of file diff --git a/package.json b/package.json index faab866f..b7f30096 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "validate-translations": "node scripts/validate-translations.js", "validate-news": "node scripts/validate-news-translations.js", "validate-all": "npm run htmlhint && npm run validate-translations && npm run validate-news", + "generate-daily-news": "node scripts/generate-daily-news.js", "generate-news": "node scripts/generate-news-enhanced.js", "generate-news-indexes": "node scripts/generate-news-indexes.js", "generate-sitemap": "node scripts/generate-sitemap.js", diff --git a/scripts/generate-daily-news.js b/scripts/generate-daily-news.js new file mode 100644 index 00000000..e42def4c --- /dev/null +++ b/scripts/generate-daily-news.js @@ -0,0 +1,452 @@ +#!/usr/bin/env node + +/** + * @module Intelligence Operations/Daily News Generation + * @category Intelligence Operations - Nightly Automated News Generation + * + * @description + * Nightly news generation orchestrator that queries the riksdag-regering-mcp server + * for documents published in the last 24 hours, groups them by type, applies a + * minimum-document threshold, and delegates article generation to the enhanced + * news engine (generate-news-enhanced.js). + * + * Designed to run unattended at 02:00 CET via GitHub Actions but is also safely + * triggerable by hand. All output files are written into news/ and the metadata + * directory, exactly as the enhanced script does, so downstream index / sitemap + * updates work without modification. + * + * Workflow + * โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + * 1. Fetch new documents from riksdag-regering-mcp published since yesterday + * 2. Group by document type (bet, prop, mot) + * 3. Generate articles for types that meet the threshold (default โ‰ฅ5 documents) + * 4. Always generate the Week-Ahead calendar article + * 5. Write generation report to news/metadata/daily-report.json + * 6. Exit 0 on full success, 1 if any article generation failed + * + * CLI flags + * โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + * --date=YYYY-MM-DD Override "yesterday" date for document window + * --threshold=N Override minimum document count (default 5) + * --types=t1,t2 Restrict article types (committee-reports,propositions,motions,week-ahead) + * --languages=l1,l2 Language codes or presets (en,sv | nordic | eu-core | all) + * --dry-run Log what would happen without writing files + * --skip-existing Skip languages that already have today's articles + * --no-week-ahead Suppress the always-on Week Ahead article + * --batch-size=N Pass through to enhanced generator for batching + * + * Environment variables + * โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + * MCP_AUTH_TOKEN Bearer token for riksdag-regering-mcp (optional) + * MCP_SERVER_URL Override MCP server URL + * MCP_CLIENT_TIMEOUT_MS Request timeout in ms (default 90000 for cold start) + * + * @author Hack23 AB - Intelligence Operations Team + * @license Apache-2.0 + * @version 1.0.0 + * + * @see {@link ./generate-news-enhanced.js} Enhanced multi-language article engine + * @see {@link ./mcp-client.js} MCP transport layer + * @see {@link ../ARTICLE_ENHANCEMENT_GUIDE.md} Workflow documentation + */ + +import fs from 'fs'; +import path from 'path'; +import { fileURLToPath } from 'url'; +import { MCPClient } from './mcp-client.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +// โ”€โ”€โ”€ CLI argument parsing โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +const args = process.argv.slice(2); + +function getArg(prefix) { + const hit = args.find(a => a.startsWith(prefix + '=')); + return hit ? hit.slice(prefix.length + 1) : null; +} + +const dryRun = args.includes('--dry-run'); +const skipExisting = args.includes('--skip-existing'); +const noWeekAhead = args.includes('--no-week-ahead'); + +const dateArg = getArg('--date'); +const thresholdArg = getArg('--threshold'); +const typesArg = getArg('--types'); +const languagesArg = getArg('--languages'); +const batchSizeArg = getArg('--batch-size'); + +const DOCUMENT_THRESHOLD = thresholdArg ? parseInt(thresholdArg, 10) : 5; + +// โ”€โ”€โ”€ Language helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +const ALL_LANGUAGES = ['en', 'sv', 'da', 'no', 'fi', 'de', 'fr', 'es', 'nl', 'ar', 'he', 'ja', 'ko', 'zh']; + +const LANGUAGE_PRESETS = { + all: ALL_LANGUAGES, + nordic: ['en', 'sv', 'da', 'no', 'fi'], + 'eu-core':['en', 'sv', 'de', 'fr', 'es', 'nl'] +}; + +let languagesInput = languagesArg ? languagesArg.trim().toLowerCase() : 'all'; +if (LANGUAGE_PRESETS[languagesInput]) { + languagesInput = LANGUAGE_PRESETS[languagesInput].join(','); +} +const LANGUAGES = languagesInput.split(',').map(l => l.trim()).filter(l => ALL_LANGUAGES.includes(l)); + +if (LANGUAGES.length === 0) { + console.error('โŒ No valid language codes. Valid codes:', ALL_LANGUAGES.join(', ')); + process.exit(1); +} + +// โ”€โ”€โ”€ Date helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +/** + * Return yesterday's date as YYYY-MM-DD (or the override from --date flag). + * @returns {string} + */ +function getFromDate() { + if (dateArg) return dateArg; + const d = new Date(); + d.setDate(d.getDate() - 1); + return d.toISOString().split('T')[0]; +} + +const TODAY = new Date().toISOString().split('T')[0]; +const FROM_DATE = getFromDate(); + +// โ”€โ”€โ”€ Paths โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +const NEWS_DIR = path.join(__dirname, '..', 'news'); +const METADATA_DIR = path.join(NEWS_DIR, 'metadata'); + +if (!fs.existsSync(METADATA_DIR)) { + fs.mkdirSync(METADATA_DIR, { recursive: true }); +} + +// โ”€โ”€โ”€ Document type โ†’ article type mapping โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +/** + * Maps riksdag document type codes to article type identifiers used by the + * enhanced generator. + */ +const DOCTYPE_TO_ARTICLE_TYPE = { + bet: 'committee-reports', + prop: 'propositions', + mot: 'motions' +}; + +const VALID_ARTICLE_TYPES = ['committee-reports', 'propositions', 'motions', 'week-ahead']; + +// โ”€โ”€โ”€ Allowed article types from --types flag โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +const requestedTypes = typesArg + ? typesArg.split(',').map(t => t.trim()).filter(t => VALID_ARTICLE_TYPES.includes(t)) + : VALID_ARTICLE_TYPES; + +// โ”€โ”€โ”€ Report accumulator โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +const report = { + date: TODAY, + fromDate: FROM_DATE, + threshold: DOCUMENT_THRESHOLD, + languages: LANGUAGES, + dryRun, + documentsFound: {}, + typesTriggered: [], + typesSkipped: [], + articlesCreated: [], + errors: [], + startTime: new Date().toISOString() +}; + +// โ”€โ”€โ”€ MCP helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +let _client = null; + +/** + * Get (or lazily create) the shared MCPClient, with cold-start warm-up. + * @returns {Promise} + */ +async function getClient() { + if (_client) return _client; + + const timeout = parseInt(process.env.MCP_CLIENT_TIMEOUT_MS, 10) || 90000; + _client = new MCPClient({ timeout }); + + console.log('โณ Warming up MCP server (cold start may take 30-60 s)โ€ฆ'); + try { + await _client.request('get_sync_status', {}); + console.log('โœ… MCP server ready'); + } catch (e) { + console.warn(`โš ๏ธ MCP warm-up failed: ${e.message} โ€” will retry on individual requests`); + } + + // Reduce timeout for normal requests after warm-up + _client.timeout = parseInt(process.env.MCP_CLIENT_TIMEOUT_MS, 10) || 30000; + return _client; +} + +/** + * Fetch documents of a given riksdag type published since FROM_DATE. + * Returns an array of document objects (may be empty). + * + * @param {MCPClient} client + * @param {'bet'|'prop'|'mot'} doctype + * @returns {Promise} + */ +async function fetchDocumentsByType(client, doctype) { + try { + console.log(` ๐Ÿ”„ Searching for '${doctype}' documents since ${FROM_DATE}โ€ฆ`); + const result = await client.request('search_dokument', { + doktyp: doctype, + from_date: FROM_DATE, + limit: 100 + }); + + // The MCP server wraps results in different shapes depending on version + let docs = []; + if (Array.isArray(result)) { + docs = result; + } else if (result && Array.isArray(result.dokumentlista?.dokument)) { + docs = result.dokumentlista.dokument; + } else if (result && Array.isArray(result.dokument)) { + docs = result.dokument; + } else if (result && result.content) { + // Text/JSON in content field + try { + const parsed = JSON.parse( + Array.isArray(result.content) ? result.content[0].text : result.content + ); + docs = parsed.dokumentlista?.dokument || parsed.dokument || []; + } catch { + docs = []; + } + } + + console.log(` ๐Ÿ“Š Found ${docs.length} '${doctype}' documents`); + return docs; + } catch (error) { + console.error(` โŒ Error fetching '${doctype}' documents: ${error.message}`); + report.errors.push({ type: doctype, error: error.message }); + return []; + } +} + +// โ”€โ”€โ”€ Article generation via enhanced script โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +/** + * Build the CLI arguments string for generate-news-enhanced.js based on the + * article type and the options passed to this script. + * + * @param {string} articleType + * @returns {string[]} argv array to pass to the child process + */ +function buildEnhancedArgs(articleType) { + const cliArgs = [`--types=${articleType}`, `--languages=${LANGUAGES.join(',')}`]; + if (dryRun) cliArgs.push('--dry-run'); + if (skipExisting) cliArgs.push('--skip-existing'); + if (batchSizeArg) cliArgs.push(`--batch-size=${batchSizeArg}`); + return cliArgs; +} + +/** + * Invoke generate-news-enhanced.js for a single article type via dynamic import + * (same process, avoids spawn overhead and shares the warmed-up MCP connection). + * Falls back to process.argv injection so the imported module sees our flags. + * + * @param {string} articleType - e.g. 'committee-reports' + * @returns {Promise} true on success + */ +async function generateArticleType(articleType) { + console.log(`\n๐Ÿ“ฐ Generating '${articleType}' articleโ€ฆ`); + + if (dryRun) { + console.log(` [DRY RUN] Would call generate-news-enhanced.js --types=${articleType}`); + return true; + } + + // Temporarily patch process.argv so the enhanced module's top-level parsing + // picks up our flags when it is imported / re-used. + const savedArgv = process.argv.slice(); + process.argv = [ + process.argv[0], + path.join(__dirname, 'generate-news-enhanced.js'), + ...buildEnhancedArgs(articleType) + ]; + + try { + // Dynamic import with cache-busting query so we can call multiple types + const cacheBust = `?type=${articleType}&ts=${Date.now()}`; + const mod = await import(`./generate-news-enhanced.js${cacheBust}`); + + let fn; + switch (articleType) { + case 'committee-reports': fn = mod.generateCommitteeReports; break; + case 'propositions': fn = mod.generatePropositions; break; + case 'motions': fn = mod.generateMotions; break; + case 'week-ahead': fn = mod.generateWeekAhead; break; + default: fn = mod.generateNews; + } + + if (typeof fn !== 'function') { + // Fall back to the main generateNews export + fn = mod.generateNews || mod.default; + } + + const result = await fn(); + return result && result.success !== false; + + } catch (importError) { + // Dynamic import with same path fails on Node < 22 with module cache + // Fall back to child_process spawn + console.warn(` โš ๏ธ Direct import failed (${importError.message}), using child process`); + return generateViaChildProcess(articleType); + } finally { + process.argv = savedArgv; + } +} + +/** + * Fallback: spawn generate-news-enhanced.js as a child process. + * @param {string} articleType + * @returns {Promise} + */ +async function generateViaChildProcess(articleType) { + const { spawn } = await import('child_process'); + + return new Promise((resolve) => { + const scriptPath = path.join(__dirname, 'generate-news-enhanced.js'); + const cliArgs = buildEnhancedArgs(articleType); + + console.log(` ๐Ÿ”ง Spawning: node ${scriptPath} ${cliArgs.join(' ')}`); + + const child = spawn(process.execPath, [scriptPath, ...cliArgs], { + stdio: 'inherit', + env: process.env + }); + + child.on('close', (code) => { + if (code === 0) { + console.log(` โœ… '${articleType}' generation succeeded`); + resolve(true); + } else { + console.error(` โŒ '${articleType}' generation failed (exit code ${code})`); + resolve(false); + } + }); + + child.on('error', (err) => { + console.error(` โŒ Spawn error for '${articleType}': ${err.message}`); + resolve(false); + }); + }); +} + +// โ”€โ”€โ”€ Main orchestration โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +async function main() { + console.log('๐Ÿ“ฐ Riksdagsmonitor โ€” Daily News Generation'); + console.log(` Date window : ${FROM_DATE} โ†’ ${TODAY}`); + console.log(` Threshold : โ‰ฅ${DOCUMENT_THRESHOLD} documents per type`); + console.log(` Languages : ${LANGUAGES.join(', ')}`); + console.log(` Types : ${requestedTypes.join(', ')}`); + console.log(` Dry run : ${dryRun ? 'YES' : 'no'}`); + console.log(''); + + const client = await getClient(); + + // โ”€โ”€ 1. Fetch documents by type โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + const documentsByType = {}; + + for (const [doctype, articleType] of Object.entries(DOCTYPE_TO_ARTICLE_TYPE)) { + if (!requestedTypes.includes(articleType)) continue; + + const docs = await fetchDocumentsByType(client, doctype); + documentsByType[articleType] = docs; + report.documentsFound[articleType] = docs.length; + } + + // โ”€โ”€ 2. Decide which article types to generate โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + const typesToGenerate = []; + + // Week-ahead is always included unless suppressed + if (!noWeekAhead && requestedTypes.includes('week-ahead')) { + typesToGenerate.push('week-ahead'); + } + + for (const [articleType, docs] of Object.entries(documentsByType)) { + if (docs.length >= DOCUMENT_THRESHOLD) { + typesToGenerate.push(articleType); + report.typesTriggered.push(articleType); + console.log(`โœ… '${articleType}': ${docs.length} documents โ‰ฅ threshold (${DOCUMENT_THRESHOLD}) โ†’ will generate`); + } else { + report.typesSkipped.push(articleType); + console.log(`โญ๏ธ '${articleType}': ${docs.length} documents < threshold (${DOCUMENT_THRESHOLD}) โ†’ skipping`); + } + } + + if (typesToGenerate.length === 0) { + console.log('\nโ„น๏ธ No article types met the threshold. Nothing to generate today.'); + report.endTime = new Date().toISOString(); + saveReport(); + process.exit(0); + } + + console.log(`\n๐Ÿš€ Generating ${typesToGenerate.length} article type(s): ${typesToGenerate.join(', ')}\n`); + + // โ”€โ”€ 3. Generate articles โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + let hasErrors = false; + + for (const articleType of typesToGenerate) { + const ok = await generateArticleType(articleType); + + if (ok) { + report.articlesCreated.push(articleType); + } else { + report.errors.push({ type: articleType, error: 'generation failed' }); + hasErrors = true; + } + } + + // โ”€โ”€ 4. Persist report โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + report.endTime = new Date().toISOString(); + saveReport(); + + // โ”€โ”€ 5. Summary โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + console.log('\nโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€'); + console.log('๐Ÿ“Š Daily Generation Summary'); + console.log(` Documents found : ${JSON.stringify(report.documentsFound)}`); + console.log(` Types triggered : ${report.typesTriggered.join(', ') || 'none'}`); + console.log(` Types skipped : ${report.typesSkipped.join(', ') || 'none'}`); + console.log(` Articles created : ${report.articlesCreated.join(', ') || 'none'}`); + console.log(` Errors : ${report.errors.length}`); + if (report.errors.length > 0) { + report.errors.forEach(e => console.error(` โš ๏ธ ${e.type}: ${e.error}`)); + } + console.log('โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n'); + + process.exit(hasErrors ? 1 : 0); +} + +function saveReport() { + try { + fs.writeFileSync( + path.join(METADATA_DIR, 'daily-report.json'), + JSON.stringify(report, null, 2) + ); + } catch (e) { + console.warn(`โš ๏ธ Could not save daily report: ${e.message}`); + } +} + +main().catch(error => { + console.error('โŒ Fatal error:', error); + process.exit(1); +});