From 82942880ebb3a2a7b004723e6dc7cc767fb03334 Mon Sep 17 00:00:00 2001 From: Joshua MARTINELLE Date: Tue, 23 Jun 2026 10:17:44 +0200 Subject: [PATCH] feat(http): add retry with backoff and jitter for rate-limited requests --- .rubocop.yml | 3 + lib/scopes_extractor/config.rb | 15 ++++ lib/scopes_extractor/http.rb | 63 ++++++++++++-- spec/scopes_extractor/http_spec.rb | 131 +++++++++++++++++++++++++++++ 4 files changed, 207 insertions(+), 5 deletions(-) create mode 100644 spec/scopes_extractor/http_spec.rb diff --git a/.rubocop.yml b/.rubocop.yml index f578a6e..935dfd3 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -25,6 +25,9 @@ Metrics/BlockLength: Metrics/ClassLength: Max: 200 +Metrics/ModuleLength: + Max: 120 + Metrics/MethodLength: Max: 30 diff --git a/lib/scopes_extractor/config.rb b/lib/scopes_extractor/config.rb index 06d52d1..1f1ccc1 100644 --- a/lib/scopes_extractor/config.rb +++ b/lib/scopes_extractor/config.rb @@ -40,6 +40,21 @@ def timeout http[:timeout] || 30 end + # Maximum number of retries on retryable HTTP responses (429/5xx). + def http_retry_max_attempts + http[:retry_max_attempts] || 5 + end + + # Base delay (seconds) for exponential backoff: base * 2**attempt. + def http_retry_base_delay + http[:retry_base_delay] || 3 + end + + # Cap (seconds) on a single retry wait. Covers HackerOne's ~60s window. + def http_retry_max_delay + http[:retry_max_delay] || 70 + end + def api load[:api] || {} end diff --git a/lib/scopes_extractor/http.rb b/lib/scopes_extractor/http.rb index 3238067..bc5c05b 100644 --- a/lib/scopes_extractor/http.rb +++ b/lib/scopes_extractor/http.rb @@ -5,6 +5,10 @@ module ScopesExtractor module HTTP + # HTTP status codes that are worth retrying with backoff. + # 429 = rate limited, 5xx = transient server-side errors. + RETRYABLE_CODES = [429, 500, 502, 503, 504].freeze + class << self attr_reader :hydra, :cookie_file @@ -35,15 +39,64 @@ def post(url, body:, headers: {}, timeout: nil) request(:post, url, body: body, headers: headers, timeout: timeout) end + def retryable?(response) + RETRYABLE_CODES.include?(response.code) + end + + # Computes how long to wait before the next retry. + # Honours a server-provided Retry-After header when present, otherwise + # falls back to capped exponential backoff with positive jitter. + # The jitter desynchronizes clients sharing the same token/IP, which is + # the main cause of repeated 429s when several tools run concurrently. + # @return [Float] delay in seconds + def retry_delay(response, attempt) + retry_after = parse_retry_after(response) + return retry_after if retry_after + + base = Config.http_retry_base_delay.to_f + cap = Config.http_retry_max_delay.to_f + + backoff = [base * (2**attempt), cap].min + [backoff + (rand * base), cap].min + end + + # Parses the Retry-After header (delay in seconds form only). + # Capped to http_retry_max_delay to keep waits bounded. + # @return [Float, nil] seconds to wait, or nil if absent/unparseable + def parse_retry_after(response) + headers = response.headers || {} + value = headers['Retry-After'] || headers['retry-after'] + return nil unless value.to_s.strip.match?(/\A\d+\z/) + + [value.to_i, Config.http_retry_max_delay].min.to_f + end + + # Extracted for testability (stubbed in specs to avoid real sleeping). + def wait(seconds) + sleep(seconds) + end + private def request(method, url, body: nil, headers: {}, timeout: nil) options = build_options(body, headers, timeout) - - response = Typhoeus::Request.new(url, options.merge(method: method)).run - - log_request(method, url, response) - response + max_attempts = Config.http_retry_max_attempts + attempt = 0 + + loop do + response = Typhoeus::Request.new(url, options.merge(method: method)).run + log_request(method, url, response) + + return response unless retryable?(response) && attempt < max_attempts + + delay = retry_delay(response, attempt) + ScopesExtractor.logger.warn( + "#{method.to_s.upcase} #{url} → #{response.code}, retrying in #{delay.round(1)}s " \ + "(attempt #{attempt + 1}/#{max_attempts})" + ) + wait(delay) + attempt += 1 + end end def build_options(body, headers, timeout) diff --git a/spec/scopes_extractor/http_spec.rb b/spec/scopes_extractor/http_spec.rb new file mode 100644 index 0000000..3bd48c7 --- /dev/null +++ b/spec/scopes_extractor/http_spec.rb @@ -0,0 +1,131 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ScopesExtractor::HTTP do + def fake_response(code, headers: {}, body: '{}') + instance_double( + Typhoeus::Response, + code: code, + headers: headers, + body: body, + total_time: 0.1, + success?: (200..299).cover?(code) + ) + end + + before do + # Avoid touching the real cookie jar and never sleep during tests. + allow(described_class).to receive_messages(build_options: {}, wait: nil) + end + + describe '#get' do + it 'returns the response without retrying on success' do + request = instance_double(Typhoeus::Request, run: fake_response(200)) + allow(Typhoeus::Request).to receive(:new).and_return(request) + + response = described_class.get('https://example.com') + + expect(response.code).to eq(200) + expect(described_class).not_to have_received(:wait) + expect(request).to have_received(:run).once + end + + it 'retries on 429 and returns the first successful response' do + request = instance_double(Typhoeus::Request) + allow(request).to receive(:run).and_return( + fake_response(429), fake_response(429), fake_response(200) + ) + allow(Typhoeus::Request).to receive(:new).and_return(request) + + response = described_class.get('https://example.com') + + expect(response.code).to eq(200) + expect(request).to have_received(:run).exactly(3).times + expect(described_class).to have_received(:wait).twice + end + + it 'retries on transient 5xx responses' do + request = instance_double(Typhoeus::Request) + allow(request).to receive(:run).and_return(fake_response(503), fake_response(200)) + allow(Typhoeus::Request).to receive(:new).and_return(request) + + expect(described_class.get('https://example.com').code).to eq(200) + expect(described_class).to have_received(:wait).once + end + + it 'gives up after the configured max attempts and returns the last response' do + allow(ScopesExtractor::Config).to receive(:http_retry_max_attempts).and_return(2) + request = instance_double(Typhoeus::Request, run: fake_response(429)) + allow(Typhoeus::Request).to receive(:new).and_return(request) + + response = described_class.get('https://example.com') + + expect(response.code).to eq(429) + # 1 initial attempt + 2 retries + expect(request).to have_received(:run).exactly(3).times + expect(described_class).to have_received(:wait).twice + end + + it 'does not retry on a non-retryable client error' do + request = instance_double(Typhoeus::Request, run: fake_response(404)) + allow(Typhoeus::Request).to receive(:new).and_return(request) + + expect(described_class.get('https://example.com').code).to eq(404) + expect(described_class).not_to have_received(:wait) + end + end + + describe '#retryable?' do + it 'is true for 429 and 5xx codes' do + [429, 500, 502, 503, 504].each do |code| + expect(described_class.retryable?(fake_response(code))).to be(true) + end + end + + it 'is false for success and other client errors' do + [200, 301, 400, 401, 404].each do |code| + expect(described_class.retryable?(fake_response(code))).to be(false) + end + end + end + + describe '#retry_delay' do + before do + allow(ScopesExtractor::Config).to receive_messages( + http_retry_base_delay: 3, http_retry_max_delay: 70 + ) + end + + it 'honours a numeric Retry-After header over the backoff' do + response = fake_response(429, headers: { 'Retry-After' => '42' }) + expect(described_class.retry_delay(response, 0)).to eq(42.0) + end + + it 'grows exponentially and stays within [base, cap] bounds' do + delays = (0..5).map { |attempt| described_class.retry_delay(fake_response(429), attempt) } + + # base=3 → unjittered curve 3,6,12,24,48,70 ; jitter adds 0..base on top, capped. + expect(delays[0]).to be_between(3, 6) + expect(delays[1]).to be_between(6, 9) + expect(delays.last).to be <= 70 + expect(delays.last).to be_within(3).of(70) + end + end + + describe '#parse_retry_after' do + it 'parses a numeric seconds value (case-insensitive header)' do + expect(described_class.parse_retry_after(fake_response(429, headers: { 'retry-after' => '30' }))).to eq(30.0) + end + + it 'caps the value at http_retry_max_delay' do + allow(ScopesExtractor::Config).to receive(:http_retry_max_delay).and_return(70) + expect(described_class.parse_retry_after(fake_response(429, headers: { 'Retry-After' => '600' }))).to eq(70.0) + end + + it 'returns nil when the header is absent or non-numeric' do + expect(described_class.parse_retry_after(fake_response(429))).to be_nil + expect(described_class.parse_retry_after(fake_response(429, headers: { 'Retry-After' => 'soon' }))).to be_nil + end + end +end