@@ -1055,7 +1055,7 @@ AbstractBasicAuthHandler Objects
10551055 *headers * should be the error headers.
10561056
10571057 *host * is either an authority (e.g. ``"python.org" ``) or a URL containing an
1058- authority component (e.g. ``"http ://python.org/" ``). In either case, the
1058+ authority component (e.g. ``"https ://python.org/" ``). In either case, the
10591059 authority must not contain a userinfo component (so, ``"python.org" `` and
10601060 ``"python.org:80" `` are fine, ``"joe:password@python.org" `` is not).
10611061
@@ -1251,10 +1251,14 @@ This example gets the python.org main page and displays the first 300 bytes of
12511251it::
12521252
12531253 >>> import urllib.request
1254- >>> with urllib.request.urlopen('http://www.python.org/') as f:
1255- ... print(f.read(300))
1256- ...
1257- b'<!doctype html>\n<!--[if lt IE 7]> <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9"> <![endif]-->\n<!--[if IE 7]> <html class="no-js ie7 lt-ie8 lt-ie9"> <![endif]-->\n<!--[if IE 8]> <html class="no-js ie8 lt-ie9">
1254+ >>> with urllib.request.urlopen('https://www.python.org/') as f:
1255+ ... # The response may be compressed (for example, 'gzip').
1256+ ... print(f.headers.get('Content-Encoding'))
1257+ ... data = f.read()
1258+ ... if f.headers.get('Content-Encoding') == 'gzip':
1259+ ... import gzip
1260+ ... data = gzip.decompress(data)
1261+ ... print(data[:300].decode('utf-8', errors='replace'))
12581262
12591263Note that urlopen returns a bytes object. This is because there is no way
12601264for urlopen to automatically determine the encoding of the byte stream
@@ -1271,26 +1275,30 @@ For additional information, see the W3C document: https://www.w3.org/Internation
12711275As the python.org website uses *utf-8 * encoding as specified in its meta tag, we
12721276will use the same for decoding the bytes object::
12731277
1274- >>> with urllib.request.urlopen('http://www.python.org/') as f:
1275- ... print(f.read(100).decode('utf-8'))
1278+ >>> with urllib.request.urlopen('https://www.python.org/') as f:
1279+ ... # Check for compression and decode appropriately.
1280+ ... enc = f.headers.get('Content-Encoding')
1281+ ... data = f.read()
1282+ ... if enc == 'gzip':
1283+ ... import gzip
1284+ ... data = gzip.decompress(data)
1285+ ... print(data[:100].decode('utf-8', errors='replace'))
12761286 ...
1277- <!doctype html>
1278- <!--[if lt IE 7]> <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9"> <![endif]-->
1279- <!-
12801287
12811288It is also possible to achieve the same result without using the
12821289:term: `context manager ` approach::
12831290
12841291 >>> import urllib.request
1285- >>> f = urllib.request.urlopen('http ://www.python.org/')
1292+ >>> f = urllib.request.urlopen('https ://www.python.org/')
12861293 >>> try:
1287- ... print(f.read(100).decode('utf-8'))
1294+ ... enc = f.headers.get('Content-Encoding')
1295+ ... data = f.read()
1296+ ... if enc == 'gzip':
1297+ ... import gzip
1298+ ... data = gzip.decompress(data)
1299+ ... print(data[:100].decode('utf-8', errors='replace'))
12881300 ... finally:
12891301 ... f.close()
1290- ...
1291- <!doctype html>
1292- <!--[if lt IE 7]> <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9"> <![endif]-->
1293- <!--
12941302
12951303In the following example, we are sending a data-stream to the stdin of a CGI
12961304and reading the data it returns to us. Note that this example will only work
@@ -1361,7 +1369,7 @@ Use the *headers* argument to the :class:`Request` constructor, or::
13611369
13621370 import urllib.request
13631371 req = urllib.request.Request('http://www.example.com/')
1364- req.add_header('Referer', 'http ://www.python.org/')
1372+ req.add_header('Referer', 'https ://www.python.org/')
13651373 # Customize the default User-Agent header value:
13661374 req.add_header('User-Agent', 'urllib-example/0.1 (Contact: . . .)')
13671375 with urllib.request.urlopen(req) as f:
@@ -1390,7 +1398,7 @@ containing parameters::
13901398 >>> import urllib.request
13911399 >>> import urllib.parse
13921400 >>> params = urllib.parse.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0})
1393- >>> url = "http ://www.musi-cal.com/cgi-bin/query ?%s" % params
1401+ >>> url = "https ://www.python.org/ ?%s" % params
13941402 >>> with urllib.request.urlopen(url) as f:
13951403 ... print(f.read().decode('utf-8'))
13961404 ...
@@ -1402,7 +1410,7 @@ from urlencode is encoded to bytes before it is sent to urlopen as data::
14021410 >>> import urllib.parse
14031411 >>> data = urllib.parse.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0})
14041412 >>> data = data.encode('ascii')
1405- >>> with urllib.request.urlopen("http ://requestb.in/xrbl82xr ", data) as f:
1413+ >>> with urllib.request.urlopen("https ://httpbin.org/post ", data) as f:
14061414 ... print(f.read().decode('utf-8'))
14071415 ...
14081416
@@ -1412,15 +1420,15 @@ environment settings::
14121420 >>> import urllib.request
14131421 >>> proxies = {'http': 'http://proxy.example.com:8080/'}
14141422 >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler(proxies))
1415- >>> with opener.open("http ://www.python.org") as f:
1423+ >>> with opener.open("https ://www.python.org") as f:
14161424 ... f.read().decode('utf-8')
14171425 ...
14181426
14191427The following example uses no proxies at all, overriding environment settings::
14201428
14211429 >>> import urllib.request
1422- >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler({}} ))
1423- >>> with opener.open("http ://www.python.org/") as f:
1430+ >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler({}))
1431+ >>> with opener.open("https ://www.python.org/") as f:
14241432 ... f.read().decode('utf-8')
14251433 ...
14261434
@@ -1453,7 +1461,7 @@ some point in the future.
14531461 The following example illustrates the most common usage scenario::
14541462
14551463 >>> import urllib.request
1456- >>> local_filename, headers = urllib.request.urlretrieve('http ://python.org/')
1464+ >>> local_filename, headers = urllib.request.urlretrieve('https ://python.org/')
14571465 >>> html = open(local_filename)
14581466 >>> html.close()
14591467
0 commit comments