From 815aeb00e0b16f19df97fa7706054a9a5008e1f1 Mon Sep 17 00:00:00 2001
From: jackwener <jakevingoo@gmail.com>
Date: Sun, 22 Mar 2026 01:00:07 +0800
Subject: [PATCH] feat: add douban, sinablog, substack adapters; upgrade medium
 to TS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New adapters:
- douban: book-hot, movie-hot, search (browser/cookie)
- sinablog: hot, search, article, user (search uses public API)
- substack: feed, publication, search (search uses public API)

Medium upgrade (YAML → TS):
- Replace tag.yaml/user.yaml/publication.yaml with TS adapters
- feed.ts (tag feed by topic), search.ts, user.ts with browser scraping
- Richer data: readTime, claps, description

Core pipeline improvements:
- template.ts: trim template before matching (supports multiline expressions)
- template.ts: evalJsExpr fallback for JS expressions in YAML templates
- template.ts: add urlencode/urldecode filters
- transform.ts: inline select inside map params
- build-manifest.ts: TS-over-YAML dedup with warning log
- build-manifest.ts: export scanTs/shouldReplaceManifestEntry for testing

Co-authored-by: Yuan <leotsao.cy@gmail.com>
---
 src/build-manifest.test.ts       |  72 ++++++++++-
 src/build-manifest.ts            |  37 +++++-
 src/clis/douban/book-hot.ts      |  15 +++
 src/clis/douban/movie-hot.ts     |  15 +++
 src/clis/douban/search.ts        |  17 +++
 src/clis/douban/shared.ts        | 165 ++++++++++++++++++++++++++
 src/clis/medium/feed.ts          |  16 +++
 src/clis/medium/publication.yaml |  32 -----
 src/clis/medium/search.ts        |  16 +++
 src/clis/medium/shared.ts        |  83 +++++++++++++
 src/clis/medium/tag.yaml         |  32 -----
 src/clis/medium/user.ts          |  16 +++
 src/clis/medium/user.yaml        |  31 -----
 src/clis/sinablog/article.ts     |  15 +++
 src/clis/sinablog/hot.ts         |  15 +++
 src/clis/sinablog/search.ts      |  56 +++++++++
 src/clis/sinablog/shared.ts      | 198 +++++++++++++++++++++++++++++++
 src/clis/sinablog/user.ts        |  16 +++
 src/clis/substack/feed.ts        |  16 +++
 src/clis/substack/publication.ts |  16 +++
 src/clis/substack/search.ts      |  91 ++++++++++++++
 src/clis/substack/shared.ts      | 132 +++++++++++++++++++++
 src/pipeline/executor.test.ts    |  30 ++++-
 src/pipeline/steps/transform.ts  |  18 ++-
 src/pipeline/template.test.ts    |  18 +++
 src/pipeline/template.ts         |  76 +++++++++++-
 src/pipeline/transform.test.ts   |  13 ++
 27 files changed, 1146 insertions(+), 111 deletions(-)
 create mode 100644 src/clis/douban/book-hot.ts
 create mode 100644 src/clis/douban/movie-hot.ts
 create mode 100644 src/clis/douban/search.ts
 create mode 100644 src/clis/douban/shared.ts
 create mode 100644 src/clis/medium/feed.ts
 delete mode 100644 src/clis/medium/publication.yaml
 create mode 100644 src/clis/medium/search.ts
 create mode 100644 src/clis/medium/shared.ts
 delete mode 100644 src/clis/medium/tag.yaml
 create mode 100644 src/clis/medium/user.ts
 delete mode 100644 src/clis/medium/user.yaml
 create mode 100644 src/clis/sinablog/article.ts
 create mode 100644 src/clis/sinablog/hot.ts
 create mode 100644 src/clis/sinablog/search.ts
 create mode 100644 src/clis/sinablog/shared.ts
 create mode 100644 src/clis/sinablog/user.ts
 create mode 100644 src/clis/substack/feed.ts
 create mode 100644 src/clis/substack/publication.ts
 create mode 100644 src/clis/substack/search.ts
 create mode 100644 src/clis/substack/shared.ts

diff --git a/src/build-manifest.test.ts b/src/build-manifest.test.ts
index b4eabfea..935f9e48 100644
--- a/src/build-manifest.test.ts
+++ b/src/build-manifest.test.ts
@@ -1,5 +1,8 @@
-import { describe, expect, it } from 'vitest';
-import { parseTsArgsBlock } from './build-manifest.js';
+import { afterEach, describe, expect, it } from 'vitest';
+import * as fs from 'node:fs';
+import * as os from 'node:os';
+import * as path from 'node:path';
+import { parseTsArgsBlock, scanTs, shouldReplaceManifestEntry } from './build-manifest.js';
 
 describe('parseTsArgsBlock', () => {
   it('keeps args with nested choices arrays', () => {
@@ -62,3 +65,68 @@ describe('parseTsArgsBlock', () => {
     ]);
   });
 });
+
+describe('manifest helper rules', () => {
+  const tempDirs: string[] = [];
+
+  afterEach(() => {
+    for (const dir of tempDirs.splice(0)) {
+      fs.rmSync(dir, { recursive: true, force: true });
+    }
+  });
+
+  it('prefers TS adapters over duplicate YAML adapters', () => {
+    expect(shouldReplaceManifestEntry(
+      {
+        site: 'demo',
+        name: 'search',
+        description: 'yaml',
+        strategy: 'public',
+        browser: false,
+        args: [],
+        type: 'yaml',
+      },
+      {
+        site: 'demo',
+        name: 'search',
+        description: 'ts',
+        strategy: 'public',
+        browser: false,
+        args: [],
+        type: 'ts',
+        modulePath: 'demo/search.js',
+      },
+    )).toBe(true);
+
+    expect(shouldReplaceManifestEntry(
+      {
+        site: 'demo',
+        name: 'search',
+        description: 'ts',
+        strategy: 'public',
+        browser: false,
+        args: [],
+        type: 'ts',
+        modulePath: 'demo/search.js',
+      },
+      {
+        site: 'demo',
+        name: 'search',
+        description: 'yaml',
+        strategy: 'public',
+        browser: false,
+        args: [],
+        type: 'yaml',
+      },
+    )).toBe(false);
+  });
+
+  it('skips TS files that do not register a cli', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencli-manifest-'));
+    tempDirs.push(dir);
+    const file = path.join(dir, 'utils.ts');
+    fs.writeFileSync(file, `export function helper() { return 'noop'; }`);
+
+    expect(scanTs(file, 'demo')).toBeNull();
+  });
+});
diff --git a/src/build-manifest.ts b/src/build-manifest.ts
index ec32f417..46ece7f2 100644
--- a/src/build-manifest.ts
+++ b/src/build-manifest.ts
@@ -199,7 +199,7 @@ function scanYaml(filePath: string, site: string): ManifestEntry | null {
   }
 }
 
-function scanTs(filePath: string, site: string): ManifestEntry | null {
+export function scanTs(filePath: string, site: string): ManifestEntry | null {
   // TS adapters self-register via cli() at import time.
   // We statically parse the source to extract metadata for the manifest stub.
   const baseName = path.basename(filePath, path.extname(filePath));
@@ -263,8 +263,17 @@ function scanTs(filePath: string, site: string): ManifestEntry | null {
   }
 }
 
+/**
+ * When both YAML and TS adapters exist for the same site/name,
+ * prefer the TS version (it self-registers and typically has richer logic).
+ */
+export function shouldReplaceManifestEntry(current: ManifestEntry, next: ManifestEntry): boolean {
+  if (current.type === next.type) return true;
+  return current.type === 'yaml' && next.type === 'ts';
+}
+
 export function buildManifest(): ManifestEntry[] {
-  const manifest: ManifestEntry[] = [];
+  const manifest = new Map<string, ManifestEntry>();
 
   if (fs.existsSync(CLIS_DIR)) {
     for (const site of fs.readdirSync(CLIS_DIR)) {
@@ -274,19 +283,37 @@ export function buildManifest(): ManifestEntry[] {
         const filePath = path.join(siteDir, file);
         if (file.endsWith('.yaml') || file.endsWith('.yml')) {
           const entry = scanYaml(filePath, site);
-          if (entry) manifest.push(entry);
+          if (entry) {
+            const key = `${entry.site}/${entry.name}`;
+            const existing = manifest.get(key);
+            if (!existing || shouldReplaceManifestEntry(existing, entry)) {
+              if (existing && existing.type !== entry.type) {
+                process.stderr.write(`⚠️  Duplicate adapter ${key}: ${existing.type} superseded by ${entry.type}\n`);
+              }
+              manifest.set(key, entry);
+            }
+          }
         } else if (
           (file.endsWith('.ts') && !file.endsWith('.d.ts') && !file.endsWith('.test.ts') && file !== 'index.ts') ||
           (file.endsWith('.js') && !file.endsWith('.d.js') && !file.endsWith('.test.js') && file !== 'index.js')
         ) {
           const entry = scanTs(filePath, site);
-          if (entry) manifest.push(entry);
+          if (entry) {
+            const key = `${entry.site}/${entry.name}`;
+            const existing = manifest.get(key);
+            if (!existing || shouldReplaceManifestEntry(existing, entry)) {
+              if (existing && existing.type !== entry.type) {
+                process.stderr.write(`⚠️  Duplicate adapter ${key}: ${existing.type} superseded by ${entry.type}\n`);
+              }
+              manifest.set(key, entry);
+            }
+          }
         }
       }
     }
   }
 
-  return manifest;
+  return [...manifest.values()];
 }
 
 function main(): void {
diff --git a/src/clis/douban/book-hot.ts b/src/clis/douban/book-hot.ts
new file mode 100644
index 00000000..9605401a
--- /dev/null
+++ b/src/clis/douban/book-hot.ts
@@ -0,0 +1,15 @@
+import { cli, Strategy } from '../../registry.js';
+import { loadDoubanBookHot } from './shared.js';
+
+cli({
+  site: 'douban',
+  name: 'book-hot',
+  description: '豆瓣图书热门榜单',
+  domain: 'book.douban.com',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'limit', type: 'int', default: 20, help: '返回的图书数量' },
+  ],
+  columns: ['rank', 'title', 'rating', 'quote', 'author', 'publisher', 'year', 'url'],
+  func: async (page, args) => loadDoubanBookHot(page, Number(args.limit) || 20),
+});
diff --git a/src/clis/douban/movie-hot.ts b/src/clis/douban/movie-hot.ts
new file mode 100644
index 00000000..8f72886c
--- /dev/null
+++ b/src/clis/douban/movie-hot.ts
@@ -0,0 +1,15 @@
+import { cli, Strategy } from '../../registry.js';
+import { loadDoubanMovieHot } from './shared.js';
+
+cli({
+  site: 'douban',
+  name: 'movie-hot',
+  description: '豆瓣电影热门榜单',
+  domain: 'movie.douban.com',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'limit', type: 'int', default: 20, help: '返回的电影数量' },
+  ],
+  columns: ['rank', 'title', 'rating', 'quote', 'director', 'year', 'region', 'url'],
+  func: async (page, args) => loadDoubanMovieHot(page, Number(args.limit) || 20),
+});
diff --git a/src/clis/douban/search.ts b/src/clis/douban/search.ts
new file mode 100644
index 00000000..3b7fc458
--- /dev/null
+++ b/src/clis/douban/search.ts
@@ -0,0 +1,17 @@
+import { cli, Strategy } from '../../registry.js';
+import { searchDouban } from './shared.js';
+
+cli({
+  site: 'douban',
+  name: 'search',
+  description: '搜索豆瓣电影、图书或音乐',
+  domain: 'search.douban.com',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'type', default: 'movie', choices: ['movie', 'book', 'music'], help: '搜索类型（movie=电影, book=图书, music=音乐）' },
+    { name: 'keyword', required: true, help: '搜索关键词' },
+    { name: 'limit', type: 'int', default: 20, help: '返回结果数量' },
+  ],
+  columns: ['rank', 'title', 'rating', 'abstract', 'url'],
+  func: async (page, args) => searchDouban(page, args.type, args.keyword, Number(args.limit) || 20),
+});
diff --git a/src/clis/douban/shared.ts b/src/clis/douban/shared.ts
new file mode 100644
index 00000000..227debcb
--- /dev/null
+++ b/src/clis/douban/shared.ts
@@ -0,0 +1,165 @@
+import { CliError } from '../../errors.js';
+import type { IPage } from '../../types.js';
+
+function clampLimit(limit: number): number {
+  return Math.max(1, Math.min(limit || 20, 50));
+}
+
+async function ensureDoubanReady(page: IPage): Promise<void> {
+  const state = await page.evaluate(`
+    (() => {
+      const title = (document.title || '').trim();
+      const href = (location.href || '').trim();
+      const blocked = href.includes('sec.douban.com') || /登录跳转/.test(title) || /异常请求/.test(document.body?.innerText || '');
+      return { blocked, title, href };
+    })()
+  `);
+  if (state?.blocked) {
+    throw new CliError(
+      'AUTH_REQUIRED',
+      'Douban requires a logged-in browser session before these commands can load data.',
+      'Please sign in to douban.com in the browser that opencli reuses, then rerun the command.',
+    );
+  }
+}
+
+export async function loadDoubanBookHot(page: IPage, limit: number): Promise<any[]> {
+  const safeLimit = clampLimit(limit);
+  await page.goto('https://book.douban.com/chart');
+  await page.wait(4);
+  await ensureDoubanReady(page);
+  const data = await page.evaluate(`
+    (() => {
+      const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
+      const books = [];
+      for (const el of Array.from(document.querySelectorAll('.media.clearfix'))) {
+        try {
+          const titleEl = el.querySelector('h2 a[href*="/subject/"]');
+          const title = normalize(titleEl?.textContent);
+          let url = titleEl?.getAttribute('href') || '';
+          if (!title || !url) continue;
+          if (!url.startsWith('http')) url = 'https://book.douban.com' + url;
+
+          const info = normalize(el.querySelector('.subject-abstract, .pl, .pub')?.textContent);
+          const infoParts = info.split('/').map((part) => part.trim()).filter(Boolean);
+          const ratingText = normalize(el.querySelector('.subject-rating .font-small, .rating_nums, .rating')?.textContent);
+          const quote = Array.from(el.querySelectorAll('.subject-tags .tag'))
+            .map((node) => normalize(node.textContent))
+            .filter(Boolean)
+            .join(' / ');
+
+          books.push({
+            rank: parseInt(normalize(el.querySelector('.green-num-box')?.textContent), 10) || books.length + 1,
+            title,
+            rating: parseFloat(ratingText) || 0,
+            quote,
+            author: infoParts[0] || '',
+            publisher: infoParts.find((part) => /出版社|出版公司|Press/i.test(part)) || infoParts[2] || '',
+            year: infoParts.find((part) => /\\d{4}(?:-\\d{1,2})?/.test(part))?.match(/\\d{4}/)?.[0] || '',
+            price: infoParts.find((part) => /元|USD|\\$|￥/.test(part)) || '',
+            url,
+            cover: el.querySelector('img')?.getAttribute('src') || '',
+          });
+        } catch {}
+      }
+      return books.slice(0, ${safeLimit});
+    })()
+  `);
+  return Array.isArray(data) ? data : [];
+}
+
+export async function loadDoubanMovieHot(page: IPage, limit: number): Promise<any[]> {
+  const safeLimit = clampLimit(limit);
+  await page.goto('https://movie.douban.com/chart');
+  await page.wait(4);
+  await ensureDoubanReady(page);
+  const data = await page.evaluate(`
+    (() => {
+      const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
+      const results = [];
+      for (const el of Array.from(document.querySelectorAll('.item'))) {
+        const titleEl = el.querySelector('.pl2 a');
+        const title = normalize(titleEl?.textContent);
+        let url = titleEl?.getAttribute('href') || '';
+        if (!title || !url) continue;
+        if (!url.startsWith('http')) url = 'https://movie.douban.com' + url;
+
+        const info = normalize(el.querySelector('.pl2 p')?.textContent);
+        const infoParts = info.split('/').map((part) => part.trim()).filter(Boolean);
+        const releaseIndex = (() => {
+          for (let i = infoParts.length - 1; i >= 0; i -= 1) {
+            if (/\\d{4}-\\d{2}-\\d{2}|\\d{4}\\/\\d{2}\\/\\d{2}/.test(infoParts[i])) return i;
+          }
+          return -1;
+        })();
+        const directorPart = releaseIndex >= 1 ? infoParts[releaseIndex - 1] : '';
+        const regionPart = releaseIndex >= 2 ? infoParts[releaseIndex - 2] : '';
+        const yearMatch = info.match(/\\b(19|20)\\d{2}\\b/);
+        results.push({
+          rank: results.length + 1,
+          title,
+          rating: parseFloat(normalize(el.querySelector('.rating_nums')?.textContent)) || 0,
+          quote: normalize(el.querySelector('.inq')?.textContent),
+          director: directorPart.replace(/^导演:\\s*/, ''),
+          year: yearMatch?.[0] || '',
+          region: regionPart,
+          url,
+          cover: el.querySelector('img')?.getAttribute('src') || '',
+        });
+        if (results.length >= ${safeLimit}) break;
+      }
+      return results;
+    })()
+  `);
+  return Array.isArray(data) ? data : [];
+}
+
+export async function searchDouban(page: IPage, type: string, keyword: string, limit: number): Promise<any[]> {
+  const safeLimit = clampLimit(limit);
+  await page.goto(`https://search.douban.com/${encodeURIComponent(type)}/subject_search?search_text=${encodeURIComponent(keyword)}`);
+  await page.wait(2);
+  await ensureDoubanReady(page);
+  const data = await page.evaluate(`
+    (async () => {
+      const type = ${JSON.stringify(type)};
+      const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
+      const seen = new Set();
+      const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
+
+      for (let i = 0; i < 20; i += 1) {
+        if (document.querySelector('.item-root .title-text, .item-root .title a')) break;
+        await sleep(300);
+      }
+
+      const items = Array.from(document.querySelectorAll('.item-root'));
+
+      const results = [];
+      for (const el of items) {
+        const titleEl = el.querySelector('.title-text, .title a, a[title]');
+        const title = normalize(titleEl?.textContent) || normalize(titleEl?.getAttribute('title'));
+        let url = titleEl?.getAttribute('href') || '';
+        if (!title || !url) continue;
+        if (!url.startsWith('http')) url = 'https://search.douban.com' + url;
+        if (!url.includes('/subject/') || seen.has(url)) continue;
+        seen.add(url);
+        const ratingText = normalize(el.querySelector('.rating_nums')?.textContent);
+        const abstract = normalize(
+          el.querySelector('.meta.abstract, .meta, .abstract, p')?.textContent,
+        );
+        results.push({
+          rank: results.length + 1,
+          id: url.match(/subject\\/(\\d+)/)?.[1] || '',
+          type,
+          title,
+          rating: ratingText.includes('.') ? parseFloat(ratingText) : 0,
+          abstract: abstract.slice(0, 100) + (abstract.length > 100 ? '...' : ''),
+          url,
+          cover: el.querySelector('img')?.getAttribute('src') || '',
+        });
+        if (results.length >= ${safeLimit}) break;
+      }
+      return results;
+    })()
+  `);
+  return Array.isArray(data) ? data : [];
+}
diff --git a/src/clis/medium/feed.ts b/src/clis/medium/feed.ts
new file mode 100644
index 00000000..b4a5178f
--- /dev/null
+++ b/src/clis/medium/feed.ts
@@ -0,0 +1,16 @@
+import { cli, Strategy } from '../../registry.js';
+import { buildMediumTagUrl, loadMediumPosts } from './shared.js';
+
+cli({
+  site: 'medium',
+  name: 'feed',
+  description: 'Medium 热门文章 Feed',
+  domain: 'medium.com',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'topic', default: '', help: '话题标签（如 technology, programming, ai）' },
+    { name: 'limit', type: 'int', default: 20, help: '返回的文章数量' },
+  ],
+  columns: ['rank', 'title', 'author', 'date', 'readTime', 'claps'],
+  func: async (page, args) => loadMediumPosts(page, buildMediumTagUrl(args.topic), Number(args.limit) || 20),
+});
diff --git a/src/clis/medium/publication.yaml b/src/clis/medium/publication.yaml
deleted file mode 100644
index c772ae91..00000000
--- a/src/clis/medium/publication.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-site: medium
-name: publication
-description: Get recent articles from a Medium publication
-domain: medium.com
-strategy: public
-browser: false
-
-args:
-  name:
-    type: string
-    required: true
-    description: The publication name/slug (e.g. netflix-techblog)
-  limit:
-    type: int
-    default: 10
-    description: Max number of stories
-
-pipeline:
-  - fetch:
-      url: https://api.rss2json.com/v1/api.json?rss_url=https://medium.com/feed/${{ args.name }}
-  
-  - select: items
-  
-  - map:
-      title: "${{ item.title }}"
-      author: "${{ item.author }}"
-      date: "${{ item.pubDate }}"
-      url: "${{ item.link }}"
-
-  - limit: ${{ args.limit }}
-
-columns: [title, author, date, url]
diff --git a/src/clis/medium/search.ts b/src/clis/medium/search.ts
new file mode 100644
index 00000000..fa93adad
--- /dev/null
+++ b/src/clis/medium/search.ts
@@ -0,0 +1,16 @@
+import { cli, Strategy } from '../../registry.js';
+import { buildMediumSearchUrl, loadMediumPosts } from './shared.js';
+
+cli({
+  site: 'medium',
+  name: 'search',
+  description: '搜索 Medium 文章',
+  domain: 'medium.com',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'keyword', required: true, help: '搜索关键词' },
+    { name: 'limit', type: 'int', default: 20, help: '返回的文章数量' },
+  ],
+  columns: ['rank', 'title', 'author', 'date', 'readTime', 'claps'],
+  func: async (page, args) => loadMediumPosts(page, buildMediumSearchUrl(args.keyword), Number(args.limit) || 20),
+});
diff --git a/src/clis/medium/shared.ts b/src/clis/medium/shared.ts
new file mode 100644
index 00000000..9d35a44e
--- /dev/null
+++ b/src/clis/medium/shared.ts
@@ -0,0 +1,83 @@
+import type { IPage } from '../../types.js';
+
+export function buildMediumTagUrl(topic?: string): string {
+  return topic ? `https://medium.com/tag/${encodeURIComponent(topic)}` : 'https://medium.com/tag/technology';
+}
+
+export function buildMediumSearchUrl(keyword: string): string {
+  return `https://medium.com/search?q=${encodeURIComponent(keyword)}`;
+}
+
+export function buildMediumUserUrl(username: string): string {
+  return username.startsWith('@') ? `https://medium.com/${username}` : `https://medium.com/@${username}`;
+}
+
+export async function loadMediumPosts(page: IPage, url: string, limit: number): Promise<any[]> {
+  if (!page) throw new Error('Requires browser session');
+  await page.goto(url);
+  await page.wait(5);
+  const data = await page.evaluate(`
+    (async () => {
+      await new Promise((resolve) => setTimeout(resolve, 3000));
+
+      const limit = ${Math.max(1, Math.min(limit, 50))};
+      const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
+      const posts = [];
+      const seen = new Set();
+
+      for (const article of Array.from(document.querySelectorAll('article'))) {
+        try {
+          const titleEl = article.querySelector('h2, h3, h1');
+          const title = normalize(titleEl?.textContent);
+          if (!title) continue;
+
+          const linkEl = titleEl?.closest('a') || article.querySelector('a[href*="/@"], a[href*="/p/"]');
+          let url = linkEl?.getAttribute('href') || '';
+          if (!url) continue;
+          if (!url.startsWith('http')) url = 'https://medium.com' + url;
+          if (seen.has(url)) continue;
+
+          const author = normalize(
+            Array.from(article.querySelectorAll('a[href^="/@"]'))
+              .map((node) => normalize(node.textContent))
+              .find((text) => text && text !== title),
+          );
+
+          const allText = normalize(article.textContent);
+          const dateEl = article.querySelector('time');
+          const date = normalize(dateEl?.textContent) ||
+            dateEl?.getAttribute('datetime') ||
+            allText.match(/\\b(?:[A-Z][a-z]{2}\\s+\\d{1,2}|\\d+[dhmw]\\s+ago)\\b/)?.[0] ||
+            '';
+
+          const readTime = allText.match(/(\\d+)\\s*min\\s*read/i)?.[0] || '';
+          const claps = allText.match(/\\b(\\d+(?:\\.\\d+)?[KkMm]?)\\s*claps?\\b/i)?.[1] || '';
+
+          const description = normalize(
+            Array.from(article.querySelectorAll('h3, p'))
+              .map((node) => normalize(node.textContent))
+              .find((text) => text && text !== title && text !== author && !/member-only story|response icon/i.test(text)),
+          );
+
+          seen.add(url);
+          posts.push({
+            rank: posts.length + 1,
+            title,
+            author,
+            date,
+            readTime,
+            claps,
+            description: description ? description.slice(0, 150) : '',
+            url,
+          });
+
+          if (posts.length >= limit) break;
+        } catch {}
+      }
+
+      return posts;
+    })()
+  `);
+
+  return Array.isArray(data) ? data : [];
+}
diff --git a/src/clis/medium/tag.yaml b/src/clis/medium/tag.yaml
deleted file mode 100644
index 8168c247..00000000
--- a/src/clis/medium/tag.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-site: medium
-name: tag
-description: Get top articles for a Medium tag
-domain: medium.com
-strategy: public
-browser: false
-
-args:
-  tag:
-    type: string
-    required: true
-    description: The tag to search for (e.g. programming)
-  limit:
-    type: int
-    default: 10
-    description: Max number of stories
-
-pipeline:
-  - fetch:
-      url: https://api.rss2json.com/v1/api.json?rss_url=https://medium.com/feed/tag/${{ args.tag }}
-  
-  - select: items
-  
-  - map:
-      title: "${{ item.title }}"
-      author: "${{ item.author }}"
-      date: "${{ item.pubDate }}"
-      url: "${{ item.link }}"
-
-  - limit: ${{ args.limit }}
-
-columns: [title, author, date, url]
diff --git a/src/clis/medium/user.ts b/src/clis/medium/user.ts
new file mode 100644
index 00000000..ae59874c
--- /dev/null
+++ b/src/clis/medium/user.ts
@@ -0,0 +1,16 @@
+import { cli, Strategy } from '../../registry.js';
+import { buildMediumUserUrl, loadMediumPosts } from './shared.js';
+
+cli({
+  site: 'medium',
+  name: 'user',
+  description: '获取 Medium 用户的文章列表',
+  domain: 'medium.com',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'username', required: true, help: 'Medium 用户名（如 @username 或 username）' },
+    { name: 'limit', type: 'int', default: 20, help: '返回的文章数量' },
+  ],
+  columns: ['rank', 'title', 'date', 'readTime', 'claps', 'url'],
+  func: async (page, args) => loadMediumPosts(page, buildMediumUserUrl(args.username), Number(args.limit) || 20),
+});
diff --git a/src/clis/medium/user.yaml b/src/clis/medium/user.yaml
deleted file mode 100644
index 4a2aeed1..00000000
--- a/src/clis/medium/user.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-site: medium
-name: user
-description: Get recent articles by a Medium user
-domain: medium.com
-strategy: public
-browser: false
-
-args:
-  username:
-    type: string
-    required: true
-    description: The medium username (without the @ symbol)
-  limit:
-    type: int
-    default: 10
-    description: Max number of stories
-
-pipeline:
-  - fetch:
-      url: https://api.rss2json.com/v1/api.json?rss_url=https://medium.com/feed/@${{ args.username }}
-  
-  - select: items
-  
-  - map:
-      title: "${{ item.title }}"
-      date: "${{ item.pubDate }}"
-      url: "${{ item.link }}"
-
-  - limit: ${{ args.limit }}
-
-columns: [title, date, url]
diff --git a/src/clis/sinablog/article.ts b/src/clis/sinablog/article.ts
new file mode 100644
index 00000000..d3260cc1
--- /dev/null
+++ b/src/clis/sinablog/article.ts
@@ -0,0 +1,15 @@
+import { cli, Strategy } from '../../registry.js';
+import { loadSinaBlogArticle } from './shared.js';
+
+cli({
+  site: 'sinablog',
+  name: 'article',
+  description: '获取新浪博客单篇文章详情',
+  domain: 'blog.sina.com.cn',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'url', required: true, help: '文章URL（如 https://blog.sina.com.cn/s/blog_xxx.html）' },
+  ],
+  columns: ['title', 'author', 'date', 'category', 'readCount', 'commentCount'],
+  func: async (page, args) => loadSinaBlogArticle(page, args.url),
+});
diff --git a/src/clis/sinablog/hot.ts b/src/clis/sinablog/hot.ts
new file mode 100644
index 00000000..4648ce3f
--- /dev/null
+++ b/src/clis/sinablog/hot.ts
@@ -0,0 +1,15 @@
+import { cli, Strategy } from '../../registry.js';
+import { loadSinaBlogHot } from './shared.js';
+
+cli({
+  site: 'sinablog',
+  name: 'hot',
+  description: '获取新浪博客热门文章/推荐',
+  domain: 'blog.sina.com.cn',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'limit', type: 'int', default: 20, help: '返回的文章数量' },
+  ],
+  columns: ['rank', 'title', 'author', 'date', 'readCount', 'url'],
+  func: async (page, args) => loadSinaBlogHot(page, Number(args.limit) || 20),
+});
diff --git a/src/clis/sinablog/search.ts b/src/clis/sinablog/search.ts
new file mode 100644
index 00000000..59f05ae3
--- /dev/null
+++ b/src/clis/sinablog/search.ts
@@ -0,0 +1,56 @@
+import { cli, Strategy } from '../../registry.js';
+
+function normalize(value: unknown): string {
+  return typeof value === 'string' ? value.replace(/\s+/g, ' ').trim() : '';
+}
+
+function stripHtml(value: string): string {
+  return value.replace(/<[^>]+>/g, '');
+}
+
+async function searchSinaBlog(keyword: string, limit: number): Promise<any[]> {
+  const url = new URL('https://search.sina.com.cn/api/search');
+  url.searchParams.set('q', keyword);
+  url.searchParams.set('tp', 'mix');
+  url.searchParams.set('sort', '0');
+  url.searchParams.set('page', '1');
+  url.searchParams.set('size', String(Math.max(limit, 10)));
+  url.searchParams.set('from', 'search_result');
+
+  const resp = await fetch(url, {
+    headers: {
+      'User-Agent': 'Mozilla/5.0',
+      Accept: 'application/json',
+    },
+  });
+  if (!resp.ok) throw new Error(`Sina blog search failed: HTTP ${resp.status}`);
+
+  const data = await resp.json() as { data?: { list?: any[] } };
+  const list = Array.isArray(data?.data?.list) ? data.data.list : [];
+  return list
+    .filter((item) => normalize(item?.url).includes('blog.sina.com.cn/s/blog_'))
+    .slice(0, limit)
+    .map((item, index) => ({
+      rank: index + 1,
+      title: normalize(stripHtml(item?.title || '')),
+      author: normalize(item?.media_show || item?.author),
+      date: normalize(item?.time || item?.dataTime),
+      description: normalize(item?.intro || item?.searchSummary).slice(0, 150),
+      url: normalize(item?.url),
+    }));
+}
+
+cli({
+  site: 'sinablog',
+  name: 'search',
+  description: '搜索新浪博客文章（通过新浪搜索）',
+  domain: 'blog.sina.com.cn',
+  strategy: Strategy.PUBLIC,
+  browser: false,
+  args: [
+    { name: 'keyword', required: true, help: '搜索关键词' },
+    { name: 'limit', type: 'int', default: 20, help: '返回的文章数量' },
+  ],
+  columns: ['rank', 'title', 'author', 'date', 'description', 'url'],
+  func: async (_page, args) => searchSinaBlog(args.keyword, Math.max(1, Math.min(Number(args.limit) || 20, 50))),
+});
diff --git a/src/clis/sinablog/shared.ts b/src/clis/sinablog/shared.ts
new file mode 100644
index 00000000..3e2b4f54
--- /dev/null
+++ b/src/clis/sinablog/shared.ts
@@ -0,0 +1,198 @@
+import type { IPage } from '../../types.js';
+
+function clampLimit(limit: number): number {
+  return Math.max(1, Math.min(limit || 20, 50));
+}
+
+export function buildSinaBlogSearchUrl(keyword: string): string {
+  return `https://search.sina.com.cn/search?q=${encodeURIComponent(keyword)}&tp=mix`;
+}
+
+export function buildSinaBlogUserUrl(uid: string): string {
+  return `https://blog.sina.com.cn/s/articlelist_${encodeURIComponent(uid)}_0_1.html`;
+}
+
+export async function loadSinaBlogArticle(page: IPage, url: string): Promise<any> {
+  await page.goto(url);
+  await page.wait(3);
+  return page.evaluate(`
+    (async () => {
+      await new Promise((resolve) => setTimeout(resolve, 1500));
+      const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
+      const title = normalize(document.querySelector('.articalTitle h2, .title h2, h1, h2.titName')?.textContent);
+      const titleParts = normalize(document.title).split('_').map((part) => normalize(part)).filter(Boolean);
+      const author = titleParts[1] || title.split(/[：:]/)[0] || '';
+      const timeText = normalize(document.querySelector('.time, .articalInfo .time')?.textContent).replace(/[()]/g, '');
+      const date = timeText || normalize(document.body.innerText.match(/\\b\\d{4}-\\d{2}-\\d{2}(?:\\s+\\d{2}:\\d{2}:\\d{2})?\\b/)?.[0]);
+      const category = normalize(document.querySelector('.articalTag .blog_class a, .blog_class a')?.textContent);
+      const tags = Array.from(document.querySelectorAll('.blog_tag h3, .blog_tag a, .tag a, .artical_tag a'))
+        .map((node) => normalize(node.textContent))
+        .filter(Boolean);
+      const content = normalize(document.querySelector('.articalContent, .blog_content, .content, #sina_keyword_ad_area2')?.textContent).slice(0, 500);
+      const images = Array.from(document.querySelectorAll('.articalContent img, .blog_content img, .content img'))
+        .map((img) => img.getAttribute('src') || img.getAttribute('real_src') || '')
+        .filter((src) => src && !src.includes('icon'))
+        .slice(0, 5);
+      return {
+        title,
+        author,
+        date,
+        category,
+        tags: tags.join(', '),
+        readCount: '',
+        commentCount: '',
+        content: content + (content.length >= 500 ? '...' : ''),
+        images: images.join(', '),
+        url: ${JSON.stringify(url)},
+      };
+    })()
+  `);
+}
+
+export async function loadSinaBlogHot(page: IPage, limit: number): Promise<any[]> {
+  const safeLimit = clampLimit(limit);
+  await page.goto('https://blog.sina.com.cn/');
+  await page.wait(3);
+  const data = await page.evaluate(`
+    (async () => {
+      await new Promise((resolve) => setTimeout(resolve, 1500));
+      const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
+      const limit = ${safeLimit};
+      const abs = (href) => {
+        if (!href) return '';
+        if (href.startsWith('//')) return 'https:' + href;
+        if (href.startsWith('http')) return href;
+        return 'https://blog.sina.com.cn' + (href.startsWith('/') ? '' : '/') + href;
+      };
+      const parseArticle = (doc, fallback) => {
+        const title = normalize(doc.querySelector('.articalTitle h2, .title h2, h1, h2.titName')?.textContent) || fallback.title;
+        const titleParts = normalize(doc.title).split('_').map((part) => normalize(part)).filter(Boolean);
+        const timeText = normalize(doc.querySelector('.time, .articalInfo .time')?.textContent).replace(/[()]/g, '');
+        const articleId = fallback.url.match(/blog_([a-zA-Z0-9]+)\\.html/)?.[1] || '';
+        return {
+          articleId,
+          title,
+          author: titleParts[1] || title.split(/[：:]/)[0] || '',
+          date: timeText || '',
+          readCount: '',
+          description: normalize(doc.querySelector('.articalContent, .blog_content, .content, #sina_keyword_ad_area2')?.textContent).slice(0, 150),
+        };
+      };
+
+      const seeds = [];
+      const seen = new Set();
+      for (const link of Array.from(document.querySelectorAll('.day-hot-rank .art-list a[href*="/s/blog_"], .hot-rank .art-list a[href*="/s/blog_"]'))) {
+        const title = normalize(link.textContent);
+        const url = abs(link.getAttribute('href') || '');
+        if (!title || !url || seen.has(url)) continue;
+        seen.add(url);
+        seeds.push({ rank: seeds.length + 1, title, url });
+        if (seeds.length >= limit) break;
+      }
+
+      const results = [];
+      for (const item of seeds) {
+        let merged = {
+          rank: item.rank,
+          articleId: item.url.match(/blog_([a-zA-Z0-9]+)\\.html/)?.[1] || '',
+          title: item.title,
+          author: '',
+          date: '',
+          readCount: '',
+          description: '',
+          url: item.url,
+        };
+        try {
+          const resp = await fetch(item.url, { credentials: 'include' });
+          if (resp.ok) {
+            const html = await resp.text();
+            const doc = new DOMParser().parseFromString(html, 'text/html');
+            merged = Object.assign(merged, parseArticle(doc, item));
+          }
+        } catch {}
+        results.push(merged);
+      }
+      return results;
+    })()
+  `);
+
+  return Array.isArray(data) ? data : [];
+}
+
+export async function loadSinaBlogSearch(page: IPage, keyword: string, limit: number): Promise<any[]> {
+  const safeLimit = clampLimit(limit);
+  await page.goto(buildSinaBlogSearchUrl(keyword));
+  await page.wait(5);
+  const data = await page.evaluate(`
+    (async () => {
+      const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
+      for (let i = 0; i < 20; i += 1) {
+        if (document.querySelector('.result-item')) break;
+        await sleep(500);
+      }
+      const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
+      const limit = ${safeLimit};
+      const items = Array.from(document.querySelectorAll('.result-item'));
+      const results = [];
+      for (const item of items) {
+        const link = item.querySelector('.result-title a[href*="blog.sina.com.cn/s/blog_"]');
+        const title = normalize(link?.textContent);
+        const url = link?.getAttribute('href') || '';
+        if (!title || !url) continue;
+        results.push({
+          rank: results.length + 1,
+          title,
+          author: normalize(item.querySelector('.result-meta .source')?.textContent),
+          date: normalize(item.querySelector('.result-meta .time')?.textContent),
+          description: normalize(item.querySelector('.result-intro')?.textContent).slice(0, 150),
+          url,
+        });
+        if (results.length >= limit) break;
+      }
+      return results;
+    })()
+  `);
+
+  return Array.isArray(data) ? data : [];
+}
+
+export async function loadSinaBlogUser(page: IPage, uid: string, limit: number): Promise<any[]> {
+  const safeLimit = clampLimit(limit);
+  await page.goto(buildSinaBlogUserUrl(uid));
+  await page.wait(3);
+  const data = await page.evaluate(`
+    (async () => {
+      await new Promise((resolve) => setTimeout(resolve, 1000));
+      const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
+      const limit = ${safeLimit};
+      const author = normalize(document.title).split('_').map((part) => normalize(part)).filter(Boolean)[1] || '';
+      const abs = (href) => {
+        if (!href) return '';
+        if (href.startsWith('//')) return 'https:' + href;
+        if (href.startsWith('http')) return href;
+        return 'https://blog.sina.com.cn' + (href.startsWith('/') ? '' : '/') + href;
+      };
+      const results = [];
+      for (const item of Array.from(document.querySelectorAll('.articleList .articleCell'))) {
+        const link = item.querySelector('.atc_title a[href*="/s/blog_"]');
+        const title = normalize(link?.textContent);
+        const url = abs(link?.getAttribute('href') || '');
+        if (!title || !url) continue;
+        results.push({
+          rank: results.length + 1,
+          articleId: url.match(/blog_([a-zA-Z0-9]+)\\.html/)?.[1] || '',
+          title,
+          author,
+          date: normalize(item.querySelector('.atc_tm')?.textContent),
+          readCount: '',
+          description: '',
+          url,
+        });
+        if (results.length >= limit) break;
+      }
+      return results;
+    })()
+  `);
+
+  return Array.isArray(data) ? data : [];
+}
diff --git a/src/clis/sinablog/user.ts b/src/clis/sinablog/user.ts
new file mode 100644
index 00000000..6096217b
--- /dev/null
+++ b/src/clis/sinablog/user.ts
@@ -0,0 +1,16 @@
+import { cli, Strategy } from '../../registry.js';
+import { loadSinaBlogUser } from './shared.js';
+
+cli({
+  site: 'sinablog',
+  name: 'user',
+  description: '获取新浪博客用户的文章列表',
+  domain: 'blog.sina.com.cn',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'uid', required: true, help: '新浪博客用户ID（如 1234567890）' },
+    { name: 'limit', type: 'int', default: 20, help: '返回的文章数量' },
+  ],
+  columns: ['rank', 'title', 'author', 'date', 'readCount', 'url'],
+  func: async (page, args) => loadSinaBlogUser(page, args.uid, Number(args.limit) || 20),
+});
diff --git a/src/clis/substack/feed.ts b/src/clis/substack/feed.ts
new file mode 100644
index 00000000..5f861e49
--- /dev/null
+++ b/src/clis/substack/feed.ts
@@ -0,0 +1,16 @@
+import { cli, Strategy } from '../../registry.js';
+import { buildSubstackBrowseUrl, loadSubstackFeed } from './shared.js';
+
+cli({
+  site: 'substack',
+  name: 'feed',
+  description: 'Substack 热门文章 Feed',
+  domain: 'substack.com',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'category', default: 'all', help: '文章分类: all, tech, business, culture, politics, science, health' },
+    { name: 'limit', type: 'int', default: 20, help: '返回的文章数量' },
+  ],
+  columns: ['rank', 'title', 'author', 'date', 'readTime', 'url'],
+  func: async (page, args) => loadSubstackFeed(page, buildSubstackBrowseUrl(args.category), Number(args.limit) || 20),
+});
diff --git a/src/clis/substack/publication.ts b/src/clis/substack/publication.ts
new file mode 100644
index 00000000..5caa4b39
--- /dev/null
+++ b/src/clis/substack/publication.ts
@@ -0,0 +1,16 @@
+import { cli, Strategy } from '../../registry.js';
+import { loadSubstackArchive } from './shared.js';
+
+cli({
+  site: 'substack',
+  name: 'publication',
+  description: '获取特定 Substack Newsletter 的最新文章',
+  domain: 'substack.com',
+  strategy: Strategy.COOKIE,
+  args: [
+    { name: 'url', required: true, help: 'Newsletter URL（如 https://example.substack.com）' },
+    { name: 'limit', type: 'int', default: 20, help: '返回的文章数量' },
+  ],
+  columns: ['rank', 'title', 'date', 'description', 'url'],
+  func: async (page, args) => loadSubstackArchive(page, args.url.replace(/\/$/, ''), Number(args.limit) || 20),
+});
diff --git a/src/clis/substack/search.ts b/src/clis/substack/search.ts
new file mode 100644
index 00000000..c447d08e
--- /dev/null
+++ b/src/clis/substack/search.ts
@@ -0,0 +1,91 @@
+import { cli, Strategy } from '../../registry.js';
+
+type SubstackPostResult = {
+  title: string;
+  author: string;
+  date: string;
+  description: string;
+  url: string;
+};
+
+function headers(): HeadersInit {
+  return {
+    'User-Agent': 'Mozilla/5.0',
+    Accept: 'application/json',
+  };
+}
+
+function trim(value: unknown): string {
+  return typeof value === 'string' ? value.replace(/\s+/g, ' ').trim() : '';
+}
+
+function publicationBaseUrl(publication: any): string {
+  if (publication?.custom_domain) return `https://${publication.custom_domain}`;
+  if (publication?.subdomain) return `https://${publication.subdomain}.substack.com`;
+  return '';
+}
+
+async function searchPosts(keyword: string, limit: number): Promise<SubstackPostResult[]> {
+  const url = new URL('https://substack.com/api/v1/post/search');
+  url.searchParams.set('query', keyword);
+  url.searchParams.set('page', '0');
+  url.searchParams.set('includePlatformResults', 'true');
+
+  const resp = await fetch(url, { headers: headers() });
+  if (!resp.ok) throw new Error(`Substack post search failed: HTTP ${resp.status}`);
+
+  const data = await resp.json() as { results?: any[] };
+  const results = Array.isArray(data?.results) ? data.results : [];
+  return results.slice(0, limit).map((item, index) => ({
+    rank: index + 1,
+    title: trim(item?.title),
+    author: trim(item?.publishedBylines?.[0]?.name),
+    date: trim(item?.post_date).split('T')[0] || trim(item?.post_date),
+    description: trim(item?.description || item?.subtitle || item?.truncated_body_text).slice(0, 150),
+    url: trim(item?.canonical_url),
+  }));
+}
+
+async function searchPublications(keyword: string, limit: number): Promise<SubstackPostResult[]> {
+  const url = new URL('https://substack.com/api/v1/profile/search');
+  url.searchParams.set('query', keyword);
+  url.searchParams.set('page', '0');
+
+  const resp = await fetch(url, { headers: headers() });
+  if (!resp.ok) throw new Error(`Substack publication search failed: HTTP ${resp.status}`);
+
+  const data = await resp.json() as { results?: any[] };
+  const results = Array.isArray(data?.results) ? data.results : [];
+  return results.slice(0, limit).map((item, index) => {
+    const publication = item?.primaryPublication || item?.publicationUsers?.[0]?.publication || {};
+    return {
+      rank: index + 1,
+      title: trim(publication?.name || item?.name),
+      author: trim(item?.name),
+      date: '',
+      description: trim(publication?.hero_text || item?.bio).slice(0, 150),
+      url: publicationBaseUrl(publication),
+    };
+  });
+}
+
+cli({
+  site: 'substack',
+  name: 'search',
+  description: '搜索 Substack 文章和 Newsletter',
+  domain: 'substack.com',
+  strategy: Strategy.PUBLIC,
+  browser: false,
+  args: [
+    { name: 'keyword', required: true, help: '搜索关键词' },
+    { name: 'type', default: 'posts', choices: ['posts', 'publications'], help: '搜索类型（posts=文章, publications=Newsletter）' },
+    { name: 'limit', type: 'int', default: 20, help: '返回结果数量' },
+  ],
+  columns: ['rank', 'title', 'author', 'date', 'description', 'url'],
+  func: async (_page, args) => {
+    const limit = Math.max(1, Math.min(Number(args.limit) || 20, 50));
+    return args.type === 'publications'
+      ? searchPublications(args.keyword, limit)
+      : searchPosts(args.keyword, limit);
+  },
+});
diff --git a/src/clis/substack/shared.ts b/src/clis/substack/shared.ts
new file mode 100644
index 00000000..08673c9e
--- /dev/null
+++ b/src/clis/substack/shared.ts
@@ -0,0 +1,132 @@
+import type { IPage } from '../../types.js';
+
+export function buildSubstackBrowseUrl(category?: string): string {
+  if (!category || category === 'all') return 'https://substack.com/';
+  const slug = category === 'tech' ? 'technology' : category;
+  return `https://substack.com/browse/${slug}`;
+}
+
+export async function loadSubstackFeed(page: IPage, url: string, limit: number): Promise<any[]> {
+  if (!page) throw new Error('Requires browser session');
+  await page.goto(url);
+  await page.wait(5);
+  const data = await page.evaluate(`
+    (async () => {
+      await new Promise((resolve) => setTimeout(resolve, 3000));
+      const limit = ${Math.max(1, Math.min(limit, 50))};
+      const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
+      const posts = [];
+      const seen = new Set();
+
+      const allLinks = Array.from(document.querySelectorAll('a')).filter((link) => {
+        const href = link.getAttribute('href') || '';
+        return href.includes('/home/post/') || href.includes('/p/');
+      });
+
+      for (const linkEl of allLinks) {
+        let postUrl = linkEl.getAttribute('href') || '';
+        if (!postUrl) continue;
+        if (!postUrl.startsWith('http')) postUrl = 'https://substack.com' + postUrl;
+        if (seen.has(postUrl)) continue;
+
+        const lines = (linkEl.innerText || '')
+          .split('\\n')
+          .map((line) => normalize(line))
+          .filter(Boolean);
+
+        const readMeta = lines.find((line) => /\\b(read|watch|listen)\\b/i.test(line)) || '';
+        if (!readMeta) continue;
+
+        const date = lines.find((line) => /^[A-Z]{3}\\s+\\d{1,2}$/i.test(line)) || '';
+        const contentLines = lines.filter((line) =>
+          line &&
+          line !== date &&
+          line !== readMeta &&
+          line.toLowerCase() !== 'save' &&
+          line.toLowerCase() !== 'more' &&
+          !/^(sign in|create account|get app)$/i.test(line),
+        );
+
+        const metaParts = readMeta.split('∙').map((part) => normalize(part));
+        const author = metaParts[0] || '';
+        const readTime = metaParts.slice(1).join(' ∙ ') || readMeta;
+        const title = contentLines.length >= 2 ? contentLines[1] : (contentLines[0] || '');
+        const description = contentLines.length >= 3 ? contentLines.slice(2).join(' ') : '';
+        if (!title) continue;
+
+        seen.add(postUrl);
+        posts.push({
+          rank: posts.length + 1,
+          title,
+          author,
+          date,
+          readTime,
+          description: description.slice(0, 150),
+          url: postUrl,
+        });
+
+        if (posts.length >= limit) break;
+      }
+
+      return posts;
+    })()
+  `);
+
+  return Array.isArray(data) ? data : [];
+}
+
+export async function loadSubstackArchive(page: IPage, baseUrl: string, limit: number): Promise<any[]> {
+  if (!page) throw new Error('Requires browser session');
+  await page.goto(`${baseUrl}/archive`);
+  await page.wait(5);
+  const data = await page.evaluate(`
+    (async () => {
+      await new Promise((resolve) => setTimeout(resolve, 3000));
+      const normalize = (value) => (value || '').replace(/\\s+/g, ' ').trim();
+      const limit = ${Math.max(1, Math.min(limit, 50))};
+      const grouped = new Map();
+
+      for (const link of Array.from(document.querySelectorAll('a[href*="/p/"]'))) {
+        const rawHref = link.getAttribute('href') || '';
+        if (!rawHref || rawHref === '/p/upgrade') continue;
+
+        const url = rawHref.startsWith('http') ? rawHref : ${JSON.stringify(baseUrl)} + rawHref;
+        const text = normalize(link.textContent);
+        if (!text) continue;
+        if (/^(subscribe|paid|home|about|latest|top|discussions)$/i.test(text)) continue;
+        if (/^[\\d,]+$/.test(text)) continue;
+
+        const entry = grouped.get(url) || { texts: new Set(), date: '' };
+        entry.texts.add(text);
+
+        const container = link.closest('article, section, div') || link.parentElement || link;
+        const containerText = normalize(container.textContent);
+        if (!entry.date) {
+          entry.date = containerText.match(/\\b(?:[A-Z]{3}\\s+\\d{1,2}|[A-Z][a-z]{2}\\s+\\d{1,2})\\b/)?.[0] || '';
+        }
+
+        grouped.set(url, entry);
+      }
+
+      const posts = [];
+      for (const [url, entry] of Array.from(grouped.entries())) {
+        const texts = Array.from(entry.texts).map((text) => normalize(text)).filter((text) => text.length > 3).sort((a, b) => a.length - b.length);
+        const title = texts[0] || '';
+        const description = texts.find((text) => text !== title) || '';
+        if (!title) continue;
+        posts.push({
+          rank: posts.length + 1,
+          title,
+          date: entry.date,
+          description: description.slice(0, 150),
+          url,
+        });
+        if (posts.length >= limit) break;
+      }
+
+      return posts;
+    })()
+  `);
+
+  return Array.isArray(data) ? data : [];
+}
diff --git a/src/pipeline/executor.test.ts b/src/pipeline/executor.test.ts
index 059def6e..19396111 100644
--- a/src/pipeline/executor.test.ts
+++ b/src/pipeline/executor.test.ts
@@ -16,8 +16,7 @@ function createMockPage(overrides: Partial<IPage> = {}): IPage {
     click: vi.fn(),
     typeText: vi.fn(),
     pressKey: vi.fn(),
-    scrollTo: vi.fn().mockResolvedValue(undefined),
-    getFormState: vi.fn().mockResolvedValue({ forms: [], orphanFields: [] }),
+    getFormState: vi.fn().mockResolvedValue({}),
     wait: vi.fn(),
     tabs: vi.fn().mockResolvedValue([]),
     closeTab: vi.fn(),
@@ -26,6 +25,7 @@ function createMockPage(overrides: Partial<IPage> = {}): IPage {
     networkRequests: vi.fn().mockResolvedValue([]),
     consoleMessages: vi.fn().mockResolvedValue(''),
     scroll: vi.fn(),
+    scrollTo: vi.fn(),
     autoScroll: vi.fn(),
     installInterceptor: vi.fn(),
     getInterceptedRequests: vi.fn().mockResolvedValue([]),
@@ -81,6 +81,32 @@ describe('executePipeline', () => {
     ]);
   });
 
+  it('runs inline select inside map step', async () => {
+    const page = createMockPage({
+      evaluate: vi.fn().mockResolvedValue({
+        posts: [
+          { title: 'First', rank: 1 },
+          { title: 'Second', rank: 2 },
+        ],
+      }),
+    });
+    const result = await executePipeline(page, [
+      { evaluate: 'test' },
+      {
+        map: {
+          select: 'posts',
+          title: '${{ item.title }}',
+          rank: '${{ item.rank }}',
+        },
+      },
+    ]);
+
+    expect(result).toEqual([
+      { title: 'First', rank: 1 },
+      { title: 'Second', rank: 2 },
+    ]);
+  });
+
   it('executes limit step', async () => {
     const page = createMockPage({
       evaluate: vi.fn().mockResolvedValue([1, 2, 3, 4, 5]),
diff --git a/src/pipeline/steps/transform.ts b/src/pipeline/steps/transform.ts
index cc909009..fa45dafe 100644
--- a/src/pipeline/steps/transform.ts
+++ b/src/pipeline/steps/transform.ts
@@ -20,13 +20,25 @@ export async function stepSelect(_page: any, params: any, data: any, args: Recor
 
 export async function stepMap(_page: any, params: any, data: any, args: Record<string, any>): Promise<any> {
   if (!data || typeof data !== 'object') return data;
-  let items: any[] = Array.isArray(data) ? data : [data];
-  if (!Array.isArray(data) && typeof data === 'object' && 'data' in data) items = data.data;
+  let source = data;
+
+  // Support inline select: { map: { select: 'path', key: '${{ item.x }}' } }
+  if (params && typeof params === 'object' && 'select' in params) {
+    source = await stepSelect(null, (params as any).select, data, args);
+  }
+
+  if (!source || typeof source !== 'object') return source;
+
+  let items: any[] = Array.isArray(source) ? source : [source];
+  if (!Array.isArray(source) && typeof source === 'object' && 'data' in source) items = source.data;
   const result: any[] = [];
   for (let i = 0; i < items.length; i++) {
     const item = items[i];
     const row: Record<string, any> = {};
-    for (const [key, template] of Object.entries(params)) row[key] = render(template, { args, data, item, index: i });
+    for (const [key, template] of Object.entries(params)) {
+      if (key === 'select') continue;
+      row[key] = render(template, { args, data: source, item, index: i });
+    }
     result.push(row);
   }
   return result;
diff --git a/src/pipeline/template.test.ts b/src/pipeline/template.test.ts
index ef02d8b2..266b4e99 100644
--- a/src/pipeline/template.test.ts
+++ b/src/pipeline/template.test.ts
@@ -57,6 +57,15 @@ describe('evalExpr', () => {
   it('resolves simple path', () => {
     expect(evalExpr('item.title', { item: { title: 'Test' } })).toBe('Test');
   });
+  it('evaluates JS helper expressions', () => {
+    expect(evalExpr('encodeURIComponent(args.keyword)', { args: { keyword: 'hello world' } })).toBe('hello%20world');
+  });
+  it('evaluates ternary expressions', () => {
+    expect(evalExpr("args.kind === 'tech' ? 'technology' : args.kind", { args: { kind: 'tech' } })).toBe('technology');
+  });
+  it('evaluates method calls on values', () => {
+    expect(evalExpr("args.username.startsWith('@') ? args.username : '@' + args.username", { args: { username: 'alice' } })).toBe('@alice');
+  });
   it('applies join filter', () => {
     expect(evalExpr('item.tags | join(,)', { item: { tags: ['a', 'b', 'c'] } })).toBe('a,b,c');
   });
@@ -104,6 +113,15 @@ describe('render', () => {
   it('renders URL template', () => {
     expect(render('https://api.example.com/search?q=${{ args.keyword }}', { args: { keyword: 'test' } })).toBe('https://api.example.com/search?q=test');
   });
+  it('renders inline helper expressions', () => {
+    expect(render('https://example.com/search?q=${{ encodeURIComponent(args.keyword) }}', { args: { keyword: 'hello world' } })).toBe('https://example.com/search?q=hello%20world');
+  });
+  it('renders full multiline expressions', () => {
+    expect(render("${{\n  args.topic ? `https://medium.com/tag/${args.topic}` : 'https://medium.com/tag/technology'\n}}", { args: { topic: 'ai' } })).toBe('https://medium.com/tag/ai');
+  });
+  it('renders block expressions with surrounding whitespace', () => {
+    expect(render("\n  ${{ args.kind === 'tech' ? 'technology' : args.kind }}\n", { args: { kind: 'tech' } })).toBe('technology');
+  });
 });
 
 describe('normalizeEvaluateSource', () => {
diff --git a/src/pipeline/template.ts b/src/pipeline/template.ts
index 5081e497..da1ef9a6 100644
--- a/src/pipeline/template.ts
+++ b/src/pipeline/template.ts
@@ -11,12 +11,13 @@ export interface RenderContext {
 
 export function render(template: any, ctx: RenderContext): any {
   if (typeof template !== 'string') return template;
+  const trimmed = template.trim();
   // Full expression: entire string is a single ${{ ... }}
   // Use [^}] to prevent matching across }} boundaries (e.g. "${{ a }}-${{ b }}")
-  const fullMatch = template.match(/^\$\{\{\s*([^}]*(?:\}[^}][^}]*)*)\s*\}\}$/);
-  if (fullMatch && !template.includes('}}-') && !template.includes('}}${{')) return evalExpr(fullMatch[1].trim(), ctx);
+  const fullMatch = trimmed.match(/^\$\{\{\s*([^}]*(?:\}[^}][^}]*)*)\s*\}\}$/);
+  if (fullMatch && !trimmed.includes('}}-') && !trimmed.includes('}}${{')) return evalExpr(fullMatch[1].trim(), ctx);
   // Check if the entire string is a single expression (no other text around it)
-  const singleExpr = template.match(/^\$\{\{\s*([\s\S]*?)\s*\}\}$/);
+  const singleExpr = trimmed.match(/^\$\{\{\s*([\s\S]*?)\s*\}\}$/);
   if (singleExpr) {
     // Verify it's truly a single expression (no other ${{ inside)
     const inner = singleExpr[1];
@@ -68,7 +69,10 @@ export function evalExpr(expr: string, ctx: RenderContext): any {
     return right.replace(/^['"]|['"]$/g, '');
   }
 
-  return resolvePath(expr, { args, item, data, index });
+  const resolved = resolvePath(expr, { args, item, data, index });
+  if (resolved !== null && resolved !== undefined) return resolved;
+
+  return evalJsExpr(expr, { args, item, data, index });
 }
 
 /**
@@ -145,6 +149,10 @@ function applyFilter(filterExpr: string, value: any): any {
       const parts = value.split(/[/\\]/);
       return parts[parts.length - 1] || value;
     }
+    case 'urlencode':
+      return typeof value === 'string' ? encodeURIComponent(value) : value;
+    case 'urldecode':
+      return typeof value === 'string' ? decodeURIComponent(value) : value;
     default:
       return value;
   }
@@ -171,6 +179,66 @@ export function resolvePath(pathStr: string, ctx: RenderContext): any {
   return obj;
 }
 
+/**
+ * Evaluate arbitrary JS expressions as a last-resort fallback.
+ *
+ * ⚠️  SECURITY NOTE: Uses `new Function()` to execute the expression.
+ * This is acceptable here because:
+ *   1. YAML adapters are authored by trusted repo contributors only.
+ *   2. The expression runs in the same Node.js process (no sandbox).
+ *   3. Only a curated set of globals is exposed (no require/import/process/fs).
+ * If opencli ever loads untrusted third-party adapters, this MUST be replaced
+ * with a proper sandboxed evaluator.
+ */
+function evalJsExpr(expr: string, ctx: RenderContext): any {
+  // Guard against absurdly long expressions that could indicate injection.
+  if (expr.length > 2000) return undefined;
+
+  const args = ctx.args ?? {};
+  const item = ctx.item ?? {};
+  const data = ctx.data;
+  const index = ctx.index ?? 0;
+
+  try {
+    const fn = new Function(
+      'args',
+      'item',
+      'data',
+      'index',
+      'encodeURIComponent',
+      'decodeURIComponent',
+      'JSON',
+      'Math',
+      'Number',
+      'String',
+      'Boolean',
+      'Array',
+      'Object',
+      'Date',
+      `"use strict"; return (${expr});`,
+    );
+
+    return fn(
+      args,
+      item,
+      data,
+      index,
+      encodeURIComponent,
+      decodeURIComponent,
+      JSON,
+      Math,
+      Number,
+      String,
+      Boolean,
+      Array,
+      Object,
+      Date,
+    );
+  } catch {
+    return undefined;
+  }
+}
+
 /**
  * Normalize JavaScript source for browser evaluate() calls.
  */
diff --git a/src/pipeline/transform.test.ts b/src/pipeline/transform.test.ts
index b6780011..ff1943c1 100644
--- a/src/pipeline/transform.test.ts
+++ b/src/pipeline/transform.test.ts
@@ -58,6 +58,19 @@ describe('stepMap', () => {
   it('returns null/undefined as-is', async () => {
     expect(await stepMap(null, { x: '${{ item.x }}' }, null, {})).toBeNull();
   });
+
+  it('supports inline select before mapping', async () => {
+    const result = await stepMap(null, {
+      select: 'posts',
+      title: '${{ item.title }}',
+      rank: '${{ index + 1 }}',
+    }, { posts: [{ title: 'One' }, { title: 'Two' }] }, {});
+
+    expect(result).toEqual([
+      { title: 'One', rank: 1 },
+      { title: 'Two', rank: 2 },
+    ]);
+  });
 });
 
 describe('stepFilter', () => {