diff --git a/include/hackney_lib.hrl b/include/hackney_lib.hrl index a36d9d84..250c0231 100644 --- a/include/hackney_lib.hrl +++ b/include/hackney_lib.hrl @@ -29,5 +29,6 @@ connection = <<>> :: binary(), ctype = <<>> :: binary(), location = <<>> :: binary(), + content_encoding = <<>> :: binary(), body_state = waiting :: atom() | tuple() }). diff --git a/src/hackney.erl b/src/hackney.erl index e1097bd3..1804f5f8 100644 --- a/src/hackney.erl +++ b/src/hackney.erl @@ -883,11 +883,15 @@ sync_request_with_redirect(ConnPid, Method, Path, Headers, Body, WithBody, Optio sync_request_with_redirect_body(ConnPid, Method, Path, HeadersList, FinalBody, WithBody, Options, URL, FollowRedirect, MaxRedirect, RedirectCount) -> - %% Extract request options for 1xx informational responses - ReqOpts = case proplists:get_value(inform_fun, Options) of + %% Extract request options for 1xx informational responses and auto_decompress + ReqOpts0 = case proplists:get_value(inform_fun, Options) of undefined -> []; InformFun -> [{inform_fun, InformFun}] end, + ReqOpts = case proplists:get_value(auto_decompress, Options, false) of + true -> [{auto_decompress, true} | ReqOpts0]; + false -> ReqOpts0 + end, case hackney_conn:request(ConnPid, Method, Path, HeadersList, FinalBody, infinity, ReqOpts) of %% HTTP/2 returns body directly - handle 4-tuple first {ok, Status, RespHeaders, RespBody} when Status >= 301, Status =< 303; Status =:= 307; Status =:= 308 -> @@ -1179,10 +1183,19 @@ add_default_headers(Headers, Options, URL) -> end end, + %% Add Accept-Encoding if auto_decompress is enabled (issue #155) + Headers3 = case proplists:get_value(auto_decompress, Options, false) of + true -> + {_, H} = hackney_headers:store_new(<<"Accept-Encoding">>, <<"gzip, deflate">>, Headers2), + H; + false -> + Headers2 + end, + %% Add cookies if present case proplists:get_value(cookie, Options, []) of - [] -> Headers2; - Cookies -> add_cookies_header(Cookies, Headers2) + [] -> Headers3; + Cookies -> add_cookies_header(Cookies, Headers3) end. add_basic_auth_header(User, Pwd, Headers) -> diff --git a/src/hackney_conn.erl b/src/hackney_conn.erl index 9799188a..0e75482c 100644 --- a/src/hackney_conn.erl +++ b/src/hackney_conn.erl @@ -130,6 +130,7 @@ connect_options = [] :: list(), ssl_options = [] :: list(), inform_fun :: fun((integer(), binary(), list()) -> any()) | undefined, + auto_decompress = false :: boolean(), %% Pool integration pool_pid :: pid() | undefined, %% If set, connection is from a pool @@ -502,7 +503,8 @@ init([DefaultOwner, Opts]) -> pool_pid = maps:get(pool_pid, Opts, undefined), enable_push = maps:get(enable_push, Opts, false), no_reuse = maps:get(no_reuse, Opts, false), - inform_fun = maps:get(inform_fun, Opts, undefined) + inform_fun = maps:get(inform_fun, Opts, undefined), + auto_decompress = maps:get(auto_decompress, Opts, false) }, %% If socket is provided, start in connected state; otherwise start in idle @@ -803,6 +805,7 @@ connected({call, From}, {request_streaming, Method, Path, Headers, Body}, #conn_ connected({call, From}, {request, Method, Path, Headers, Body, ReqOpts}, Data) -> %% HTTP/1.1 request InformFun = proplists:get_value(inform_fun, ReqOpts, undefined), + AutoDecompress = proplists:get_value(auto_decompress, ReqOpts, false), NewData = Data#conn_data{ request_from = From, method = Method, @@ -816,7 +819,8 @@ connected({call, From}, {request, Method, Path, Headers, Body, ReqOpts}, Data) - async = false, async_ref = undefined, stream_to = undefined, - inform_fun = InformFun + inform_fun = InformFun, + auto_decompress = AutoDecompress }, {next_state, sending, NewData, [{next_event, internal, {send_request, Method, Path, Headers, Body}}]}; @@ -1788,11 +1792,73 @@ read_full_body(Data, Acc) -> {ok, Chunk, NewData} -> read_full_body(NewData, <>); {done, NewData} -> - {ok, Acc, NewData}; + %% Body complete - apply decompression if needed + maybe_decompress_body(Acc, NewData); {error, Reason} -> {error, Reason} end. +%% @private Apply decompression if auto_decompress is enabled and Content-Encoding is set +maybe_decompress_body(Body, #conn_data{auto_decompress = false} = Data) -> + {ok, Body, Data}; +maybe_decompress_body(Body, #conn_data{response_headers = undefined} = Data) -> + {ok, Body, Data}; +maybe_decompress_body(Body, #conn_data{response_headers = Headers, auto_decompress = true} = Data) -> + %% Get Content-Encoding from response headers + ContentEncoding = case hackney_headers:get_value(<<"content-encoding">>, Headers) of + undefined -> <<>>; + CE -> hackney_bstr:to_lower(CE) + end, + case ContentEncoding of + <<"gzip">> -> + case decompress_gzip(Body) of + {ok, Decompressed} -> {ok, Decompressed, Data}; + {error, Reason} -> {error, {decompress_error, gzip, Reason}} + end; + <<"deflate">> -> + case decompress_deflate(Body) of + {ok, Decompressed} -> {ok, Decompressed, Data}; + {error, Reason} -> {error, {decompress_error, deflate, Reason}} + end; + <<"x-gzip">> -> + %% x-gzip is an alias for gzip + case decompress_gzip(Body) of + {ok, Decompressed} -> {ok, Decompressed, Data}; + {error, Reason} -> {error, {decompress_error, gzip, Reason}} + end; + _ -> + %% No compression or unknown encoding - return as-is + {ok, Body, Data} + end. + +%% @private Decompress gzip-encoded data +decompress_gzip(Data) -> + try + {ok, zlib:gunzip(Data)} + catch + error:Reason -> {error, Reason}; + exit:Reason -> {error, Reason} + end. + +%% @private Decompress deflate-encoded data +%% Note: Some servers send raw deflate, others send zlib-wrapped deflate +decompress_deflate(Data) -> + %% Try zlib-wrapped first (RFC 1950), then raw deflate (RFC 1951) + try + Z = zlib:open(), + try + ok = zlib:inflateInit(Z), + Decompressed = zlib:inflate(Z, Data), + ok = zlib:inflateEnd(Z), + {ok, iolist_to_binary(Decompressed)} + after + zlib:close(Z) + end + catch + error:Reason -> {error, Reason}; + exit:Reason -> {error, Reason} + end. + %% @private Stream a single body chunk stream_body_chunk(#conn_data{method = <<"HEAD">>} = Data) -> {done, Data}; diff --git a/src/hackney_http.erl b/src/hackney_http.erl index 53df08d7..1c9e2ab3 100644 --- a/src/hackney_http.erl +++ b/src/hackney_http.erl @@ -346,6 +346,9 @@ parse_header(Line, St) -> <<"location">> -> Location = hackney_bstr:trim(Value), St#hparser{location=Location}; + <<"content-encoding">> -> + CE = hackney_bstr:to_lower(hackney_bstr:trim(Value)), + St#hparser{content_encoding=CE}; _ -> St end, diff --git a/test/hackney_decompress_tests.erl b/test/hackney_decompress_tests.erl new file mode 100644 index 00000000..d0dbd1d2 --- /dev/null +++ b/test/hackney_decompress_tests.erl @@ -0,0 +1,84 @@ +%% @doc Tests for automatic gzip/deflate decompression (issue #155) +-module(hackney_decompress_tests). +-include_lib("eunit/include/eunit.hrl"). + +-define(PORT, 9879). + +%% Setup/teardown for integration tests +setup() -> + {ok, _} = application:ensure_all_started(hackney), + {ok, _} = application:ensure_all_started(cowboy), + Dispatch = cowboy_router:compile([{'_', [{"/[...]", test_http_resource, []}]}]), + {ok, _} = cowboy:start_clear(test_decompress_http, [{port, ?PORT}], #{ + env => #{dispatch => Dispatch} + }), + ok. + +cleanup(_) -> + cowboy:stop_listener(test_decompress_http), + ok. + +url(Path) -> + <<"http://localhost:", (integer_to_binary(?PORT))/binary, Path/binary>>. + +%% ============================================================================= +%% Issue #155: Transparent gzip/deflate decompression +%% ============================================================================= + +auto_decompress_test_() -> + {setup, + fun setup/0, + fun cleanup/1, + [ + {"gzip response decompressed correctly", fun test_gzip_decompression/0}, + {"deflate response decompressed correctly", fun test_deflate_decompression/0}, + {"no decompression when option not set", fun test_no_decompression_without_option/0}, + {"uncompressed response unchanged", fun test_uncompressed_response/0}, + {"Accept-Encoding header added when auto_decompress is set", fun test_accept_encoding_header/0} + ]}. + +%% Test gzip response is decompressed correctly +test_gzip_decompression() -> + {ok, StatusCode, _Headers, Body} = hackney:request(get, url(<<"/compressed/gzip">>), [], <<>>, + [{with_body, true}, {auto_decompress, true}]), + ?assertEqual(200, StatusCode), + ?assertEqual(<<"Hello, this is gzip compressed data!">>, Body). + +%% Test deflate response is decompressed correctly +test_deflate_decompression() -> + {ok, StatusCode, _Headers, Body} = hackney:request(get, url(<<"/compressed/deflate">>), [], <<>>, + [{with_body, true}, {auto_decompress, true}]), + ?assertEqual(200, StatusCode), + ?assertEqual(<<"Hello, this is deflate compressed data!">>, Body). + +%% Test that without auto_decompress option, body is not decompressed +test_no_decompression_without_option() -> + {ok, StatusCode, _Headers, Body} = hackney:request(get, url(<<"/compressed/gzip">>), [], <<>>, + [{with_body, true}]), %% No auto_decompress + ?assertEqual(200, StatusCode), + %% Body should still be compressed (not the plain text) + ?assertNotEqual(<<"Hello, this is gzip compressed data!">>, Body), + %% Verify it's actually gzip data by checking magic bytes + <<16#1f, 16#8b, _/binary>> = Body. %% gzip magic number + +%% Test uncompressed response is unchanged +test_uncompressed_response() -> + {ok, StatusCode, _Headers, Body} = hackney:request(get, url(<<"/compressed/none">>), [], <<>>, + [{with_body, true}, {auto_decompress, true}]), + ?assertEqual(200, StatusCode), + ?assertEqual(<<"Hello, this is uncompressed data!">>, Body). + +%% Test that Accept-Encoding header is added when auto_decompress is set +test_accept_encoding_header() -> + %% Make request to /get which echoes headers back as JSON + {ok, StatusCode, _Headers, Body} = hackney:request(get, url(<<"/get">>), [], <<>>, + [{with_body, true}, {auto_decompress, true}]), + ?assertEqual(200, StatusCode), + %% Parse JSON response and check for accept-encoding header + BodyMap = jsx:decode(Body, [return_maps]), + HeadersMap = maps:get(<<"headers">>, BodyMap), + AcceptEncoding = maps:get(<<"accept-encoding">>, HeadersMap, undefined), + ?assertNotEqual(undefined, AcceptEncoding), + %% Should contain gzip and deflate + ?assert(binary:match(AcceptEncoding, <<"gzip">>) =/= nomatch), + ?assert(binary:match(AcceptEncoding, <<"deflate">>) =/= nomatch). diff --git a/test/test_http_resource.erl b/test/test_http_resource.erl index ac80de24..82ff7a30 100644 --- a/test/test_http_resource.erl +++ b/test/test_http_resource.erl @@ -160,6 +160,38 @@ handle_request(<<"GET">>, <<"/inform">>, Req0, State) -> %% Then send final response reply_json(200, #{<<"informed">> => true, <<"inform_status">> => InformStatus}, Req0, State); +%% GET /compressed/gzip - return gzip compressed response +handle_request(<<"GET">>, <<"/compressed/gzip">>, Req, State) -> + Data = <<"Hello, this is gzip compressed data!">>, + CompressedData = zlib:gzip(Data), + Req2 = cowboy_req:reply(200, #{ + <<"content-type">> => <<"text/plain">>, + <<"content-encoding">> => <<"gzip">> + }, CompressedData, Req), + {ok, Req2, State}; + +%% GET /compressed/deflate - return deflate compressed response +handle_request(<<"GET">>, <<"/compressed/deflate">>, Req, State) -> + Data = <<"Hello, this is deflate compressed data!">>, + Z = zlib:open(), + ok = zlib:deflateInit(Z), + CompressedData = iolist_to_binary(zlib:deflate(Z, Data, finish)), + ok = zlib:deflateEnd(Z), + zlib:close(Z), + Req2 = cowboy_req:reply(200, #{ + <<"content-type">> => <<"text/plain">>, + <<"content-encoding">> => <<"deflate">> + }, CompressedData, Req), + {ok, Req2, State}; + +%% GET /compressed/none - return uncompressed response +handle_request(<<"GET">>, <<"/compressed/none">>, Req, State) -> + Data = <<"Hello, this is uncompressed data!">>, + Req2 = cowboy_req:reply(200, #{ + <<"content-type">> => <<"text/plain">> + }, Data, Req), + {ok, Req2, State}; + %% Fallback - return 404 handle_request(_Method, _Path, Req, State) -> Req2 = cowboy_req:reply(404, #{