Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ Metrics/BlockLength:
Metrics/ClassLength:
Max: 200

Metrics/ModuleLength:
Max: 120

Metrics/MethodLength:
Max: 30

Expand Down
15 changes: 15 additions & 0 deletions lib/scopes_extractor/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,21 @@ def timeout
http[:timeout] || 30
end

# Maximum number of retries on retryable HTTP responses (429/5xx).
def http_retry_max_attempts
http[:retry_max_attempts] || 5
end

# Base delay (seconds) for exponential backoff: base * 2**attempt.
def http_retry_base_delay
http[:retry_base_delay] || 3
end

# Cap (seconds) on a single retry wait. Covers HackerOne's ~60s window.
def http_retry_max_delay
http[:retry_max_delay] || 70
end

def api
load[:api] || {}
end
Expand Down
63 changes: 58 additions & 5 deletions lib/scopes_extractor/http.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

module ScopesExtractor
module HTTP
# HTTP status codes that are worth retrying with backoff.
# 429 = rate limited, 5xx = transient server-side errors.
RETRYABLE_CODES = [429, 500, 502, 503, 504].freeze

class << self
attr_reader :hydra, :cookie_file

Expand Down Expand Up @@ -35,15 +39,64 @@ def post(url, body:, headers: {}, timeout: nil)
request(:post, url, body: body, headers: headers, timeout: timeout)
end

def retryable?(response)
RETRYABLE_CODES.include?(response.code)
end

# Computes how long to wait before the next retry.
# Honours a server-provided Retry-After header when present, otherwise
# falls back to capped exponential backoff with positive jitter.
# The jitter desynchronizes clients sharing the same token/IP, which is
# the main cause of repeated 429s when several tools run concurrently.
# @return [Float] delay in seconds
def retry_delay(response, attempt)
retry_after = parse_retry_after(response)
return retry_after if retry_after

base = Config.http_retry_base_delay.to_f
cap = Config.http_retry_max_delay.to_f

backoff = [base * (2**attempt), cap].min
[backoff + (rand * base), cap].min
end

# Parses the Retry-After header (delay in seconds form only).
# Capped to http_retry_max_delay to keep waits bounded.
# @return [Float, nil] seconds to wait, or nil if absent/unparseable
def parse_retry_after(response)
headers = response.headers || {}
value = headers['Retry-After'] || headers['retry-after']
return nil unless value.to_s.strip.match?(/\A\d+\z/)

[value.to_i, Config.http_retry_max_delay].min.to_f
end

# Extracted for testability (stubbed in specs to avoid real sleeping).
def wait(seconds)
sleep(seconds)
end

private

def request(method, url, body: nil, headers: {}, timeout: nil)
options = build_options(body, headers, timeout)

response = Typhoeus::Request.new(url, options.merge(method: method)).run

log_request(method, url, response)
response
max_attempts = Config.http_retry_max_attempts
attempt = 0

loop do
response = Typhoeus::Request.new(url, options.merge(method: method)).run
log_request(method, url, response)

return response unless retryable?(response) && attempt < max_attempts

delay = retry_delay(response, attempt)
ScopesExtractor.logger.warn(
"#{method.to_s.upcase} #{url} → #{response.code}, retrying in #{delay.round(1)}s " \
"(attempt #{attempt + 1}/#{max_attempts})"
)
wait(delay)
attempt += 1
end
end

def build_options(body, headers, timeout)
Expand Down
131 changes: 131 additions & 0 deletions spec/scopes_extractor/http_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe ScopesExtractor::HTTP do
def fake_response(code, headers: {}, body: '{}')
instance_double(
Typhoeus::Response,
code: code,
headers: headers,
body: body,
total_time: 0.1,
success?: (200..299).cover?(code)
)
end

before do
# Avoid touching the real cookie jar and never sleep during tests.
allow(described_class).to receive_messages(build_options: {}, wait: nil)
end

describe '#get' do
it 'returns the response without retrying on success' do
request = instance_double(Typhoeus::Request, run: fake_response(200))
allow(Typhoeus::Request).to receive(:new).and_return(request)

response = described_class.get('https://example.com')

expect(response.code).to eq(200)
expect(described_class).not_to have_received(:wait)
expect(request).to have_received(:run).once
end

it 'retries on 429 and returns the first successful response' do
request = instance_double(Typhoeus::Request)
allow(request).to receive(:run).and_return(
fake_response(429), fake_response(429), fake_response(200)
)
allow(Typhoeus::Request).to receive(:new).and_return(request)

response = described_class.get('https://example.com')

expect(response.code).to eq(200)
expect(request).to have_received(:run).exactly(3).times
expect(described_class).to have_received(:wait).twice
end

it 'retries on transient 5xx responses' do
request = instance_double(Typhoeus::Request)
allow(request).to receive(:run).and_return(fake_response(503), fake_response(200))
allow(Typhoeus::Request).to receive(:new).and_return(request)

expect(described_class.get('https://example.com').code).to eq(200)
expect(described_class).to have_received(:wait).once
end

it 'gives up after the configured max attempts and returns the last response' do
allow(ScopesExtractor::Config).to receive(:http_retry_max_attempts).and_return(2)
request = instance_double(Typhoeus::Request, run: fake_response(429))
allow(Typhoeus::Request).to receive(:new).and_return(request)

response = described_class.get('https://example.com')

expect(response.code).to eq(429)
# 1 initial attempt + 2 retries
expect(request).to have_received(:run).exactly(3).times
expect(described_class).to have_received(:wait).twice
end

it 'does not retry on a non-retryable client error' do
request = instance_double(Typhoeus::Request, run: fake_response(404))
allow(Typhoeus::Request).to receive(:new).and_return(request)

expect(described_class.get('https://example.com').code).to eq(404)
expect(described_class).not_to have_received(:wait)
end
end

describe '#retryable?' do
it 'is true for 429 and 5xx codes' do
[429, 500, 502, 503, 504].each do |code|
expect(described_class.retryable?(fake_response(code))).to be(true)
end
end

it 'is false for success and other client errors' do
[200, 301, 400, 401, 404].each do |code|
expect(described_class.retryable?(fake_response(code))).to be(false)
end
end
end

describe '#retry_delay' do
before do
allow(ScopesExtractor::Config).to receive_messages(
http_retry_base_delay: 3, http_retry_max_delay: 70
)
end

it 'honours a numeric Retry-After header over the backoff' do
response = fake_response(429, headers: { 'Retry-After' => '42' })
expect(described_class.retry_delay(response, 0)).to eq(42.0)
end

it 'grows exponentially and stays within [base, cap] bounds' do
delays = (0..5).map { |attempt| described_class.retry_delay(fake_response(429), attempt) }

# base=3 → unjittered curve 3,6,12,24,48,70 ; jitter adds 0..base on top, capped.
expect(delays[0]).to be_between(3, 6)
expect(delays[1]).to be_between(6, 9)
expect(delays.last).to be <= 70
expect(delays.last).to be_within(3).of(70)
end
end

describe '#parse_retry_after' do
it 'parses a numeric seconds value (case-insensitive header)' do
expect(described_class.parse_retry_after(fake_response(429, headers: { 'retry-after' => '30' }))).to eq(30.0)
end

it 'caps the value at http_retry_max_delay' do
allow(ScopesExtractor::Config).to receive(:http_retry_max_delay).and_return(70)
expect(described_class.parse_retry_after(fake_response(429, headers: { 'Retry-After' => '600' }))).to eq(70.0)
end

it 'returns nil when the header is absent or non-numeric' do
expect(described_class.parse_retry_after(fake_response(429))).to be_nil
expect(described_class.parse_retry_after(fake_response(429, headers: { 'Retry-After' => 'soon' }))).to be_nil
end
end
end
Loading