Decodo · paulius-krutkis-dcd · Jun 10, 2026
diff --git a/apps/backend/src/features/decodo/decodo.service.spec.ts b/apps/backend/src/features/decodo/decodo.service.spec.ts
@@ -148,6 +148,35 @@ describe('DecodoService', () => {
       ).rejects.toThrow(ServiceUnavailableException);
     });
 
+    it('throws ServiceUnavailableException with Decodo message when status is failed', async () => {
+      fetchSpy.mockResolvedValue({
+        ok: true,
+        json: () =>
+          Promise.resolve({
+            status: 'failed',
+            status_code: 613,
+            message: 'Target returned an error',
+          }),
+      } as unknown as Response);
+
+      await expect(
+        service.scrape({ target: 'universal', url: 'https://reddit.com/search.json?q=test' }),
+      ).rejects.toThrow('Decodo scrape failed: Target returned an error');
+    });
+
+    it('includes headless in request body when provided', async () => {
+      fetchSpy.mockResolvedValue(makeDecodoFetch('{"data":{"children":[]}}', 200));
+
+      await service.scrape({
+        target: 'universal',
+        url: 'https://www.reddit.com/search.json?q=test',
+        headless: 'html',
+      });
+
+      const body = JSON.parse((fetchSpy.mock.calls[0][1] as RequestInit).body as string);
+      expect(body.headless).toBe('html');
+    });
+
     it('returns { status, content, url, target } from results[0]', async () => {
       const content = '{"data":{"children":[]}}';
       fetchSpy.mockResolvedValue(makeDecodoFetch(content, 200));
@@ -169,6 +198,15 @@ describe('DecodoService', () => {
   // ---------------------------------------------------------------------------
 
   describe('searchReddit()', () => {
+    it('sends headless: html for Reddit JSON search URLs', async () => {
+      fetchSpy.mockResolvedValue(makeDecodoFetch(makePostListingJson([{ id: 'p1' }]), 200));
+
+      await service.searchReddit({ query: 'test', timeRange: 'week' });
+
+      const body = JSON.parse((fetchSpy.mock.calls[0][1] as RequestInit).body as string);
+      expect(body.headless).toBe('html');
+    });
+
     it('builds URL with encoded query and correct t= timeRange param', async () => {
       fetchSpy.mockResolvedValue(makeDecodoFetch(makePostListingJson([{ id: 'p1' }]), 200));
 
@@ -346,15 +384,15 @@ describe('DecodoService', () => {
       expect(posts.map((p) => p.id)).toEqual(['r1', 'r2', 'r3']);
     });
 
-    it('builds URL with subreddit hot feed, uses reddit_subreddit target', async () => {
+    it('builds hot.json URL and uses universal target with headless html', async () => {
       fetchSpy.mockResolvedValue(makeDecodoFetch(makePostListingJson([{ id: 'r1' }]), 200));
 
       await service.scrapeSubreddit({ subreddit: 'javascript' });
 
       const body = JSON.parse((fetchSpy.mock.calls[0][1] as RequestInit).body as string);
-      expect(body.target).toBe('reddit_subreddit');
-      expect(body.url).toContain('/r/javascript');
-      expect(body.url).toContain('sort=hot');
+      expect(body.target).toBe('universal');
+      expect(body.headless).toBe('html');
+      expect(body.url).toContain('/r/javascript/hot.json');
     });
 
     it('strips `r/` prefix from subreddit param so URL is not /r/r/foo', async () => {
@@ -363,9 +401,19 @@ describe('DecodoService', () => {
       await service.scrapeSubreddit({ subreddit: 'r/programming' });
 
       const body = JSON.parse((fetchSpy.mock.calls[0][1] as RequestInit).body as string);
-      expect(body.url).toContain('/r/programming.json');
+      expect(body.url).toContain('/r/programming/hot.json');
       expect(body.url).not.toContain('/r/r/');
     });
+
+    it('throws ServiceUnavailableException when Reddit returns an HTML block page', async () => {
+      fetchSpy.mockResolvedValue(
+        makeDecodoFetch('<!DOCTYPE html><html><body>Access denied</body></html>', 200),
+      );
+
+      await expect(service.scrapeSubreddit({ subreddit: 'programming' })).rejects.toThrow(
+        ServiceUnavailableException,
+      );
+    });
   });
 
   // ---------------------------------------------------------------------------
@@ -404,6 +452,7 @@ describe('DecodoService', () => {
 
       const body = JSON.parse((fetchSpy.mock.calls[0][1] as RequestInit).body as string);
       expect(body.target).toBe('universal');
+      expect(body.headless).toBe('html');
     });
 
     it('returns { id: postId, comments: [] } when status is 404', async () => {

diff --git a/apps/backend/src/features/decodo/decodo.service.ts b/apps/backend/src/features/decodo/decodo.service.ts
@@ -56,6 +56,7 @@ export class DecodoService {
         target: request.target,
         url: request.url,
         locale: request.locale ?? 'en',
+        ...(request.headless ? { headless: request.headless } : {}),
       }),
       signal,
     });
@@ -76,6 +77,19 @@ export class DecodoService {
     const first = results?.[0];
 
     if (!first) {
+      const decodoStatus = raw['status'];
+      const decodoStatusCode = raw['status_code'];
+      const decodoMessage = raw['message'];
+
+      if (decodoStatus === 'failed') {
+        const detail =
+          typeof decodoMessage === 'string'
+            ? decodoMessage
+            : `status code ${String(decodoStatusCode ?? 'unknown')}`;
+        this.logger.warn(`[Decodo] Scrape failed: ${detail}`);
+        throw new ServiceUnavailableException(`Decodo scrape failed: ${detail}`);
+      }
+
       this.logger.warn(
         `[Decodo] Unexpected response shape (keys: ${Object.keys(raw).join(', ')}): ` +
           JSON.stringify(raw).slice(0, 300),
@@ -120,8 +134,8 @@ export class DecodoService {
     const encodedQuery = encodeURIComponent(finalQuery);
     const url = `https://www.reddit.com/search.json?q=${encodedQuery}&sort=relevance&t=${timeRange}&limit=${limit}`;
 
-    const result = await this.scrape({ target: 'universal', url }, signal);
-    return this.parsePostListing(result.content as string | object, 'universal');
+    const result = await this.scrape({ target: 'universal', url, headless: 'html' }, signal);
+    return this.parsePostListing(result.content as string | object, 'universal', result.status);
   }
 
   // ---------------------------------------------------------------------------
@@ -134,10 +148,10 @@ export class DecodoService {
   ): Promise<RedditPost[]> {
     const { limit = 25 } = params;
     const subreddit = normalizeSubreddit(params.subreddit);
-    const url = `https://www.reddit.com/r/${subreddit}.json?sort=hot&limit=${limit}`;
+    const url = `https://www.reddit.com/r/${subreddit}/hot.json?limit=${limit}`;
 
-    const result = await this.scrape({ target: 'reddit_subreddit', url }, signal);
-    return this.parsePostListing(result.content as string | object, 'reddit_subreddit');
+    const result = await this.scrape({ target: 'universal', url, headless: 'html' }, signal);
+    return this.parsePostListing(result.content as string | object, 'universal', result.status);
   }
 
   // ---------------------------------------------------------------------------
@@ -154,7 +168,7 @@ export class DecodoService {
 
     // Use universal target: reddit_post returns 404 for .json URLs;
     // universal fetches the raw JSON string which our parser already handles correctly.
-    const result = await this.scrape({ target: 'universal', url }, signal);
+    const result = await this.scrape({ target: 'universal', url, headless: 'html' }, signal);
 
     if (result.status !== 200) {
       this.logger.warn(`[scrapePost] Skipping post ${postId} — status ${result.status}`);
@@ -182,13 +196,40 @@ export class DecodoService {
 
   private parseContent<T>(content: string | object): T {
     if (typeof content === 'string') {
-      return JSON.parse(content) as T;
+      const trimmed = content.trim();
+      if (this.looksLikeBlockedPage(trimmed)) {
+        throw new ServiceUnavailableException(
+          'Reddit returned a block page instead of JSON. Check your Decodo API key and quota.',
+        );
+      }
+      return JSON.parse(trimmed) as T;
     }
     return content as T;
   }
 
-  private parsePostListing(content: string | object, _target: DecodoTarget): RedditPost[] {
+  private looksLikeBlockedPage(content: string): boolean {
+    if (!content) return false;
+    const head = content.slice(0, 200).toLowerCase();
+    return (
+      head.startsWith('<!doctype') ||
+      head.startsWith('<html') ||
+      head.includes('<body') ||
+      head.includes("you've been blocked") ||
+      head.includes('access denied')
+    );
+  }
+
+  private parsePostListing(
+    content: string | object,
+    _target: DecodoTarget,
+    status = 200,
+  ): RedditPost[] {
     try {
+      if (status !== 200) {
+        this.logger.warn(`[Parser] Skipping listing — HTTP status ${status}`);
+        return [];
+      }
+
       const json = this.parseContent<{
         data?: {
           children?: Array<{ data: Record<string, unknown> }>;
@@ -199,6 +240,7 @@ export class DecodoService {
       this.logger.log(`[Parser] parsePostListing found ${children.length} children`);
       return children.map((child) => this.mapPost(child.data));
     } catch (err) {
+      if (err instanceof ServiceUnavailableException) throw err;
       this.logger.warn(`Failed to parse post listing: ${String(err)}`);
       return [];
     }

diff --git a/apps/backend/src/features/decodo/decodo.types.ts b/apps/backend/src/features/decodo/decodo.types.ts
@@ -4,6 +4,7 @@ export interface DecodoScrapeRequest {
   target: DecodoTarget;
   url: string;
   locale?: string;
+  headless?: 'html';
 }
 
 export interface DecodoScrapeResponse {

diff --git a/apps/backend/src/features/llm/llm.constants.ts b/apps/backend/src/features/llm/llm.constants.ts
@@ -1,6 +1,6 @@
 export const LLM_DEFAULTS = {
   claude: {
-    model: 'claude-sonnet-4-20250514',
+    model: 'claude-sonnet-4-6',
   },
   openai: {
     model: 'gpt-4o',
@@ -22,7 +22,7 @@ Analyze the prompt and return a JSON object with exactly this shape:
 
 Rules:
 - subreddits: 2–5 subreddits where this specific topic is actually discussed. Use bare names WITHOUT the "r/" prefix (e.g. "lithuania", not "r/lithuania"). Prefer niche, topic-specific communities over large generic ones (e.g. for Lithuanian drama prefer "lithuania", "europe", "worldcinema" over "drama" or "television"). NEVER pick "drama" — that subreddit is for internet gossip, not theatrical/film drama.
-- queries: 2–5 search queries. The FIRST query MUST be the user's exact prompt verbatim (or with minimal rephrasing if needed for clarity). Remaining queries may explore related angles. Wrap multi-word phrases in double quotes for exact matching (e.g. "Lithuanian drama").
+- queries: 2–5 Reddit search queries. The FIRST query MUST be the core topic or product name from the prompt (e.g. "Firecrawl", "AI coding tools") — NOT the full natural-language prompt. Remaining queries may explore related angles (reviews, alternatives, complaints). Wrap product names and multi-word phrases in double quotes for exact matching.
 - timeRange: one of "day", "week", "month", "year" — pick based on the topic's recency needs. Use "year" or "month" for niche cultural topics where recent results may be sparse.
 - rationale: 1–2 sentences explaining your choices
 - Return ONLY valid JSON, no markdown, no extra text`;

diff --git a/apps/backend/src/features/llm/llm.service.spec.ts b/apps/backend/src/features/llm/llm.service.spec.ts
@@ -131,7 +131,7 @@ describe('LlmService', () => {
 
         await service.complete({ ...baseRequest, provider: 'claude' });
 
-        expect(mockAnthropicMessagesCreate.mock.calls[0][0].model).toBe('claude-sonnet-4-20250514');
+        expect(mockAnthropicMessagesCreate.mock.calls[0][0].model).toBe('claude-sonnet-4-6');
       });
 
       it('throws BadRequestException when anthropicApiKey is missing', async () => {
@@ -141,6 +141,18 @@ describe('LlmService', () => {
           BadRequestException,
         );
       });
+
+      it('throws HttpException 429 when Anthropic returns rate_limit_error', async () => {
+        mockAnthropicMessagesCreate.mockRejectedValueOnce(
+          Object.assign(new Error('429 rate_limit_error'), { status: 429 }),
+        );
+
+        await expect(
+          service.complete({ ...baseRequest, provider: 'claude' }),
+        ).rejects.toMatchObject({
+          status: 429,
+        });
+      });
     });
 
     describe('OpenAI', () => {

diff --git a/apps/backend/src/features/llm/strategies/claude.strategy.ts b/apps/backend/src/features/llm/strategies/claude.strategy.ts
@@ -1,4 +1,4 @@
-import { BadRequestException, Logger } from '@nestjs/common';
+import { BadRequestException, HttpException, Logger } from '@nestjs/common';
 import Anthropic from '@anthropic-ai/sdk';
 import { LLM_DEFAULTS } from '../llm.constants';
 import type { LlmResponse } from '../llm.types';
@@ -16,20 +16,49 @@ export class ClaudeStrategy implements LlmStrategy {
     const client = new Anthropic({ apiKey: config.anthropicApiKey });
     this.logger.log(`Calling Claude model: ${model}`);
 
-    const response = await client.messages.create(
-      {
-        model,
-        max_tokens: 4096,
-        messages: request.messages.map((m) => ({
-          role: m.role,
-          content: m.content,
-        })),
-      },
-      { signal },
-    );
-
-    const content = response.content[0].type === 'text' ? response.content[0].text : '';
-
-    return { content, provider: 'claude', model };
+    try {
+      const response = await client.messages.create(
+        {
+          model,
+          max_tokens: 4096,
+          messages: request.messages.map((m) => ({
+            role: m.role,
+            content: m.content,
+          })),
+        },
+        { signal },
+      );
+
+      const content = response.content[0].type === 'text' ? response.content[0].text : '';
+
+      return { content, provider: 'claude', model };
+    } catch (err) {
+      throw ClaudeStrategy.toHttpException(err);
+    }
+  }
+
+  private static toHttpException(err: unknown): HttpException | BadRequestException {
+    const status = (err as { status?: number }).status;
+    if (status === undefined) {
+      throw err;
+    }
+
+    if (status === 429) {
+      return new HttpException(
+        'Anthropic rate limit reached. Wait a minute and try again, or switch LLM provider in Settings.',
+        429,
+      );
+    }
+
+    if (status === 401) {
+      return new BadRequestException('ANTHROPIC_API_KEY is invalid or expired');
+    }
+
+    if (status === 404) {
+      return new BadRequestException('Anthropic model not found. Update the model in Settings.');
+    }
+
+    const message = err instanceof Error ? err.message : String(err);
+    return new HttpException(`Anthropic API error: ${message}`, status ?? 502);
   }
 }