Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 54 additions & 5 deletions apps/backend/src/features/decodo/decodo.service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,35 @@ describe('DecodoService', () => {
).rejects.toThrow(ServiceUnavailableException);
});

it('throws ServiceUnavailableException with Decodo message when status is failed', async () => {
fetchSpy.mockResolvedValue({
ok: true,
json: () =>
Promise.resolve({
status: 'failed',
status_code: 613,
message: 'Target returned an error',
}),
} as unknown as Response);

await expect(
service.scrape({ target: 'universal', url: 'https://reddit.com/search.json?q=test' }),
).rejects.toThrow('Decodo scrape failed: Target returned an error');
});

it('includes headless in request body when provided', async () => {
fetchSpy.mockResolvedValue(makeDecodoFetch('{"data":{"children":[]}}', 200));

await service.scrape({
target: 'universal',
url: 'https://www.reddit.com/search.json?q=test',
headless: 'html',
});

const body = JSON.parse((fetchSpy.mock.calls[0][1] as RequestInit).body as string);
expect(body.headless).toBe('html');
});

it('returns { status, content, url, target } from results[0]', async () => {
const content = '{"data":{"children":[]}}';
fetchSpy.mockResolvedValue(makeDecodoFetch(content, 200));
Expand All @@ -169,6 +198,15 @@ describe('DecodoService', () => {
// ---------------------------------------------------------------------------

describe('searchReddit()', () => {
it('sends headless: html for Reddit JSON search URLs', async () => {
fetchSpy.mockResolvedValue(makeDecodoFetch(makePostListingJson([{ id: 'p1' }]), 200));

await service.searchReddit({ query: 'test', timeRange: 'week' });

const body = JSON.parse((fetchSpy.mock.calls[0][1] as RequestInit).body as string);
expect(body.headless).toBe('html');
});

it('builds URL with encoded query and correct t= timeRange param', async () => {
fetchSpy.mockResolvedValue(makeDecodoFetch(makePostListingJson([{ id: 'p1' }]), 200));

Expand Down Expand Up @@ -346,15 +384,15 @@ describe('DecodoService', () => {
expect(posts.map((p) => p.id)).toEqual(['r1', 'r2', 'r3']);
});

it('builds URL with subreddit hot feed, uses reddit_subreddit target', async () => {
it('builds hot.json URL and uses universal target with headless html', async () => {
fetchSpy.mockResolvedValue(makeDecodoFetch(makePostListingJson([{ id: 'r1' }]), 200));

await service.scrapeSubreddit({ subreddit: 'javascript' });

const body = JSON.parse((fetchSpy.mock.calls[0][1] as RequestInit).body as string);
expect(body.target).toBe('reddit_subreddit');
expect(body.url).toContain('/r/javascript');
expect(body.url).toContain('sort=hot');
expect(body.target).toBe('universal');
expect(body.headless).toBe('html');
expect(body.url).toContain('/r/javascript/hot.json');
});

it('strips `r/` prefix from subreddit param so URL is not /r/r/foo', async () => {
Expand All @@ -363,9 +401,19 @@ describe('DecodoService', () => {
await service.scrapeSubreddit({ subreddit: 'r/programming' });

const body = JSON.parse((fetchSpy.mock.calls[0][1] as RequestInit).body as string);
expect(body.url).toContain('/r/programming.json');
expect(body.url).toContain('/r/programming/hot.json');
expect(body.url).not.toContain('/r/r/');
});

it('throws ServiceUnavailableException when Reddit returns an HTML block page', async () => {
fetchSpy.mockResolvedValue(
makeDecodoFetch('<!DOCTYPE html><html><body>Access denied</body></html>', 200),
);

await expect(service.scrapeSubreddit({ subreddit: 'programming' })).rejects.toThrow(
ServiceUnavailableException,
);
});
});

// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -404,6 +452,7 @@ describe('DecodoService', () => {

const body = JSON.parse((fetchSpy.mock.calls[0][1] as RequestInit).body as string);
expect(body.target).toBe('universal');
expect(body.headless).toBe('html');
});

it('returns { id: postId, comments: [] } when status is 404', async () => {
Expand Down
58 changes: 50 additions & 8 deletions apps/backend/src/features/decodo/decodo.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export class DecodoService {
target: request.target,
url: request.url,
locale: request.locale ?? 'en',
...(request.headless ? { headless: request.headless } : {}),
}),
signal,
});
Expand All @@ -76,6 +77,19 @@ export class DecodoService {
const first = results?.[0];

if (!first) {
const decodoStatus = raw['status'];
const decodoStatusCode = raw['status_code'];
const decodoMessage = raw['message'];

if (decodoStatus === 'failed') {
const detail =
typeof decodoMessage === 'string'
? decodoMessage
: `status code ${String(decodoStatusCode ?? 'unknown')}`;
this.logger.warn(`[Decodo] Scrape failed: ${detail}`);
throw new ServiceUnavailableException(`Decodo scrape failed: ${detail}`);
}

this.logger.warn(
`[Decodo] Unexpected response shape (keys: ${Object.keys(raw).join(', ')}): ` +
JSON.stringify(raw).slice(0, 300),
Expand Down Expand Up @@ -120,8 +134,8 @@ export class DecodoService {
const encodedQuery = encodeURIComponent(finalQuery);
const url = `https://www.reddit.com/search.json?q=${encodedQuery}&sort=relevance&t=${timeRange}&limit=${limit}`;

const result = await this.scrape({ target: 'universal', url }, signal);
return this.parsePostListing(result.content as string | object, 'universal');
const result = await this.scrape({ target: 'universal', url, headless: 'html' }, signal);
return this.parsePostListing(result.content as string | object, 'universal', result.status);
}

// ---------------------------------------------------------------------------
Expand All @@ -134,10 +148,10 @@ export class DecodoService {
): Promise<RedditPost[]> {
const { limit = 25 } = params;
const subreddit = normalizeSubreddit(params.subreddit);
const url = `https://www.reddit.com/r/${subreddit}.json?sort=hot&limit=${limit}`;
const url = `https://www.reddit.com/r/${subreddit}/hot.json?limit=${limit}`;

const result = await this.scrape({ target: 'reddit_subreddit', url }, signal);
return this.parsePostListing(result.content as string | object, 'reddit_subreddit');
const result = await this.scrape({ target: 'universal', url, headless: 'html' }, signal);
return this.parsePostListing(result.content as string | object, 'universal', result.status);
}

// ---------------------------------------------------------------------------
Expand All @@ -154,7 +168,7 @@ export class DecodoService {

// Use universal target: reddit_post returns 404 for .json URLs;
// universal fetches the raw JSON string which our parser already handles correctly.
const result = await this.scrape({ target: 'universal', url }, signal);
const result = await this.scrape({ target: 'universal', url, headless: 'html' }, signal);

if (result.status !== 200) {
this.logger.warn(`[scrapePost] Skipping post ${postId} — status ${result.status}`);
Expand Down Expand Up @@ -182,13 +196,40 @@ export class DecodoService {

private parseContent<T>(content: string | object): T {
if (typeof content === 'string') {
return JSON.parse(content) as T;
const trimmed = content.trim();
if (this.looksLikeBlockedPage(trimmed)) {
throw new ServiceUnavailableException(
'Reddit returned a block page instead of JSON. Check your Decodo API key and quota.',
);
}
return JSON.parse(trimmed) as T;
}
return content as T;
}

private parsePostListing(content: string | object, _target: DecodoTarget): RedditPost[] {
private looksLikeBlockedPage(content: string): boolean {
if (!content) return false;
const head = content.slice(0, 200).toLowerCase();
return (
head.startsWith('<!doctype') ||
head.startsWith('<html') ||
head.includes('<body') ||
head.includes("you've been blocked") ||
head.includes('access denied')
);
}

private parsePostListing(
content: string | object,
_target: DecodoTarget,
status = 200,
): RedditPost[] {
try {
if (status !== 200) {
this.logger.warn(`[Parser] Skipping listing — HTTP status ${status}`);
return [];
}

const json = this.parseContent<{
data?: {
children?: Array<{ data: Record<string, unknown> }>;
Expand All @@ -199,6 +240,7 @@ export class DecodoService {
this.logger.log(`[Parser] parsePostListing found ${children.length} children`);
return children.map((child) => this.mapPost(child.data));
} catch (err) {
if (err instanceof ServiceUnavailableException) throw err;
this.logger.warn(`Failed to parse post listing: ${String(err)}`);
return [];
}
Expand Down
1 change: 1 addition & 0 deletions apps/backend/src/features/decodo/decodo.types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export interface DecodoScrapeRequest {
target: DecodoTarget;
url: string;
locale?: string;
headless?: 'html';
}

export interface DecodoScrapeResponse {
Expand Down
4 changes: 2 additions & 2 deletions apps/backend/src/features/llm/llm.constants.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
export const LLM_DEFAULTS = {
claude: {
model: 'claude-sonnet-4-20250514',
model: 'claude-sonnet-4-6',
},
openai: {
model: 'gpt-4o',
Expand All @@ -22,7 +22,7 @@ Analyze the prompt and return a JSON object with exactly this shape:

Rules:
- subreddits: 2–5 subreddits where this specific topic is actually discussed. Use bare names WITHOUT the "r/" prefix (e.g. "lithuania", not "r/lithuania"). Prefer niche, topic-specific communities over large generic ones (e.g. for Lithuanian drama prefer "lithuania", "europe", "worldcinema" over "drama" or "television"). NEVER pick "drama" — that subreddit is for internet gossip, not theatrical/film drama.
- queries: 2–5 search queries. The FIRST query MUST be the user's exact prompt verbatim (or with minimal rephrasing if needed for clarity). Remaining queries may explore related angles. Wrap multi-word phrases in double quotes for exact matching (e.g. "Lithuanian drama").
- queries: 2–5 Reddit search queries. The FIRST query MUST be the core topic or product name from the prompt (e.g. "Firecrawl", "AI coding tools") — NOT the full natural-language prompt. Remaining queries may explore related angles (reviews, alternatives, complaints). Wrap product names and multi-word phrases in double quotes for exact matching.
- timeRange: one of "day", "week", "month", "year" — pick based on the topic's recency needs. Use "year" or "month" for niche cultural topics where recent results may be sparse.
- rationale: 1–2 sentences explaining your choices
- Return ONLY valid JSON, no markdown, no extra text`;
Expand Down
14 changes: 13 additions & 1 deletion apps/backend/src/features/llm/llm.service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ describe('LlmService', () => {

await service.complete({ ...baseRequest, provider: 'claude' });

expect(mockAnthropicMessagesCreate.mock.calls[0][0].model).toBe('claude-sonnet-4-20250514');
expect(mockAnthropicMessagesCreate.mock.calls[0][0].model).toBe('claude-sonnet-4-6');
});

it('throws BadRequestException when anthropicApiKey is missing', async () => {
Expand All @@ -141,6 +141,18 @@ describe('LlmService', () => {
BadRequestException,
);
});

it('throws HttpException 429 when Anthropic returns rate_limit_error', async () => {
mockAnthropicMessagesCreate.mockRejectedValueOnce(
Object.assign(new Error('429 rate_limit_error'), { status: 429 }),
);

await expect(
service.complete({ ...baseRequest, provider: 'claude' }),
).rejects.toMatchObject({
status: 429,
});
});
});

describe('OpenAI', () => {
Expand Down
61 changes: 45 additions & 16 deletions apps/backend/src/features/llm/strategies/claude.strategy.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { BadRequestException, Logger } from '@nestjs/common';
import { BadRequestException, HttpException, Logger } from '@nestjs/common';
import Anthropic from '@anthropic-ai/sdk';
import { LLM_DEFAULTS } from '../llm.constants';
import type { LlmResponse } from '../llm.types';
Expand All @@ -16,20 +16,49 @@ export class ClaudeStrategy implements LlmStrategy {
const client = new Anthropic({ apiKey: config.anthropicApiKey });
this.logger.log(`Calling Claude model: ${model}`);

const response = await client.messages.create(
{
model,
max_tokens: 4096,
messages: request.messages.map((m) => ({
role: m.role,
content: m.content,
})),
},
{ signal },
);

const content = response.content[0].type === 'text' ? response.content[0].text : '';

return { content, provider: 'claude', model };
try {
const response = await client.messages.create(
{
model,
max_tokens: 4096,
messages: request.messages.map((m) => ({
role: m.role,
content: m.content,
})),
},
{ signal },
);

const content = response.content[0].type === 'text' ? response.content[0].text : '';

return { content, provider: 'claude', model };
} catch (err) {
throw ClaudeStrategy.toHttpException(err);
}
}

private static toHttpException(err: unknown): HttpException | BadRequestException {
const status = (err as { status?: number }).status;
if (status === undefined) {
throw err;
}

if (status === 429) {
return new HttpException(
'Anthropic rate limit reached. Wait a minute and try again, or switch LLM provider in Settings.',
429,
);
}

if (status === 401) {
return new BadRequestException('ANTHROPIC_API_KEY is invalid or expired');
}

if (status === 404) {
return new BadRequestException('Anthropic model not found. Update the model in Settings.');
}

const message = err instanceof Error ? err.message : String(err);
return new HttpException(`Anthropic API error: ${message}`, status ?? 502);
}
}
Loading