mirror of https://github.com/python/cpython.git
Merged revisions 82510 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r82510 | senthil.kumaran | 2010-07-03 23:18:22 +0530 (Sat, 03 Jul 2010) | 4 lines Fix Issue5468 - urlencode to handle bytes and other alternate encodings. (Extensive tests provided). Patch by Dan Mahn. ........
This commit is contained in:
parent
8e42fb7ada
commit
fe1ad15b4b
|
@ -307,23 +307,29 @@ The :mod:`urllib.parse` module defines the following functions:
|
||||||
``b'a&\xef'``.
|
``b'a&\xef'``.
|
||||||
|
|
||||||
|
|
||||||
.. function:: urlencode(query, doseq=False)
|
.. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None)
|
||||||
|
|
||||||
Convert a mapping object or a sequence of two-element tuples to a
|
Convert a mapping object or a sequence of two-element, which may either be a
|
||||||
"url-encoded" string, suitable to pass to :func:`urlopen` above as the
|
:class:`str` or a :class:`bytes` tuples, to a "url-encoded" string,
|
||||||
optional *data* argument. This is useful to pass a dictionary of form
|
suitable to pass to :func:`urlopen` above as the optional *data* argument.
|
||||||
fields to a ``POST`` request. The resulting string is a series of
|
This is useful to pass a dictionary of form fields to a ``POST`` request.
|
||||||
``key=value`` pairs separated by ``'&'`` characters, where both *key* and
|
The resulting string is a series of ``key=value`` pairs separated by ``'&'``
|
||||||
*value* are quoted using :func:`quote_plus` above. When a sequence of
|
characters, where both *key* and *value* are quoted using :func:`quote_plus`
|
||||||
two-element tuples is used as the *query* argument, the first element of
|
above. When a sequence of two-element tuples is used as the *query*
|
||||||
each tuple is a key and the second is a value. The value element in itself
|
argument, the first element of each tuple is a key and the second is a
|
||||||
can be a sequence and in that case, if the optional parameter *doseq* is
|
value. The value element in itself can be a sequence and in that case, if
|
||||||
evaluates to *True*, individual ``key=value`` pairs separated by ``'&'``are
|
the optional parameter *doseq* is evaluates to *True*, individual
|
||||||
generated for each element of the value sequence for the key. The order of
|
``key=value`` pairs separated by ``'&'`` are generated for each element of
|
||||||
parameters in the encoded string will match the order of parameter tuples in
|
the value sequence for the key. The order of parameters in the encoded
|
||||||
the sequence. This module provides the functions :func:`parse_qs` and
|
string will match the order of parameter tuples in the sequence. This module
|
||||||
:func:`parse_qsl` which are used to parse query strings into Python data
|
provides the functions :func:`parse_qs` and :func:`parse_qsl` which are used
|
||||||
structures.
|
to parse query strings into Python data structures.
|
||||||
|
|
||||||
|
When *query* parameter is a :class:`str`, the *safe*, *encoding* and *error*
|
||||||
|
parameters are sent the :func:`quote_plus` for encoding.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.2
|
||||||
|
query paramater supports bytes and string.
|
||||||
|
|
||||||
|
|
||||||
.. seealso::
|
.. seealso::
|
||||||
|
|
|
@ -797,6 +797,116 @@ def test_nonstring_seq_values(self):
|
||||||
self.assertEqual("a=a&a=b",
|
self.assertEqual("a=a&a=b",
|
||||||
urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
|
urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
|
||||||
|
|
||||||
|
def test_urlencode_encoding(self):
|
||||||
|
# ASCII encoding. Expect %3F with errors="replace'
|
||||||
|
given = (('\u00a0', '\u00c1'),)
|
||||||
|
expect = '%3F=%3F'
|
||||||
|
result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
# Default is UTF-8 encoding.
|
||||||
|
given = (('\u00a0', '\u00c1'),)
|
||||||
|
expect = '%C2%A0=%C3%81'
|
||||||
|
result = urllib.parse.urlencode(given)
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
# Latin-1 encoding.
|
||||||
|
given = (('\u00a0', '\u00c1'),)
|
||||||
|
expect = '%A0=%C1'
|
||||||
|
result = urllib.parse.urlencode(given, encoding="latin-1")
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
def test_urlencode_encoding_doseq(self):
|
||||||
|
# ASCII Encoding. Expect %3F with errors="replace'
|
||||||
|
given = (('\u00a0', '\u00c1'),)
|
||||||
|
expect = '%3F=%3F'
|
||||||
|
result = urllib.parse.urlencode(given, doseq=True,
|
||||||
|
encoding="ASCII", errors="replace")
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
# ASCII Encoding. On a sequence of values.
|
||||||
|
given = (("\u00a0", (1, "\u00c1")),)
|
||||||
|
expect = '%3F=1&%3F=%3F'
|
||||||
|
result = urllib.parse.urlencode(given, True,
|
||||||
|
encoding="ASCII", errors="replace")
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
# Utf-8
|
||||||
|
given = (("\u00a0", "\u00c1"),)
|
||||||
|
expect = '%C2%A0=%C3%81'
|
||||||
|
result = urllib.parse.urlencode(given, True)
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
given = (("\u00a0", (42, "\u00c1")),)
|
||||||
|
expect = '%C2%A0=42&%C2%A0=%C3%81'
|
||||||
|
result = urllib.parse.urlencode(given, True)
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
# latin-1
|
||||||
|
given = (("\u00a0", "\u00c1"),)
|
||||||
|
expect = '%A0=%C1'
|
||||||
|
result = urllib.parse.urlencode(given, True, encoding="latin-1")
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
given = (("\u00a0", (42, "\u00c1")),)
|
||||||
|
expect = '%A0=42&%A0=%C1'
|
||||||
|
result = urllib.parse.urlencode(given, True, encoding="latin-1")
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
def test_urlencode_bytes(self):
|
||||||
|
given = ((b'\xa0\x24', b'\xc1\x24'),)
|
||||||
|
expect = '%A0%24=%C1%24'
|
||||||
|
result = urllib.parse.urlencode(given)
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
result = urllib.parse.urlencode(given, True)
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
# Sequence of values
|
||||||
|
given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
|
||||||
|
expect = '%A0%24=42&%A0%24=%C1%24'
|
||||||
|
result = urllib.parse.urlencode(given, True)
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
def test_urlencode_encoding_safe_parameter(self):
|
||||||
|
|
||||||
|
# Send '$' (\x24) as safe character
|
||||||
|
# Default utf-8 encoding
|
||||||
|
|
||||||
|
given = ((b'\xa0\x24', b'\xc1\x24'),)
|
||||||
|
result = urllib.parse.urlencode(given, safe=":$")
|
||||||
|
expect = '%A0$=%C1$'
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
given = ((b'\xa0\x24', b'\xc1\x24'),)
|
||||||
|
result = urllib.parse.urlencode(given, doseq=True, safe=":$")
|
||||||
|
expect = '%A0$=%C1$'
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
# Safe parameter in sequence
|
||||||
|
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
|
||||||
|
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
|
||||||
|
result = urllib.parse.urlencode(given, True, safe=":$")
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
# Test all above in latin-1 encoding
|
||||||
|
|
||||||
|
given = ((b'\xa0\x24', b'\xc1\x24'),)
|
||||||
|
result = urllib.parse.urlencode(given, safe=":$",
|
||||||
|
encoding="latin-1")
|
||||||
|
expect = '%A0$=%C1$'
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
|
given = ((b'\xa0\x24', b'\xc1\x24'),)
|
||||||
|
expect = '%A0$=%C1$'
|
||||||
|
result = urllib.parse.urlencode(given, doseq=True, safe=":$",
|
||||||
|
encoding="latin-1")
|
||||||
|
|
||||||
|
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
|
||||||
|
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
|
||||||
|
result = urllib.parse.urlencode(given, True, safe=":$",
|
||||||
|
encoding="latin-1")
|
||||||
|
self.assertEqual(expect, result)
|
||||||
|
|
||||||
class Pathname_Tests(unittest.TestCase):
|
class Pathname_Tests(unittest.TestCase):
|
||||||
"""Test pathname2url() and url2pathname()"""
|
"""Test pathname2url() and url2pathname()"""
|
||||||
|
|
||||||
|
|
|
@ -533,7 +533,7 @@ def quote_from_bytes(bs, safe='/'):
|
||||||
_safe_quoters[cachekey] = quoter
|
_safe_quoters[cachekey] = quoter
|
||||||
return ''.join([quoter[char] for char in bs])
|
return ''.join([quoter[char] for char in bs])
|
||||||
|
|
||||||
def urlencode(query, doseq=False):
|
def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
|
||||||
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
|
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
|
||||||
|
|
||||||
If any values in the query arg are sequences and doseq is true, each
|
If any values in the query arg are sequences and doseq is true, each
|
||||||
|
@ -542,6 +542,10 @@ def urlencode(query, doseq=False):
|
||||||
If the query arg is a sequence of two-element tuples, the order of the
|
If the query arg is a sequence of two-element tuples, the order of the
|
||||||
parameters in the output will match the order of parameters in the
|
parameters in the output will match the order of parameters in the
|
||||||
input.
|
input.
|
||||||
|
|
||||||
|
The query arg may be either a string or a bytes type. When query arg is a
|
||||||
|
string, the safe, encoding and error parameters are sent the quote_plus for
|
||||||
|
encoding.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if hasattr(query, "items"):
|
if hasattr(query, "items"):
|
||||||
|
@ -566,14 +570,28 @@ def urlencode(query, doseq=False):
|
||||||
l = []
|
l = []
|
||||||
if not doseq:
|
if not doseq:
|
||||||
for k, v in query:
|
for k, v in query:
|
||||||
k = quote_plus(str(k))
|
if isinstance(k, bytes):
|
||||||
v = quote_plus(str(v))
|
k = quote_plus(k, safe)
|
||||||
|
else:
|
||||||
|
k = quote_plus(str(k), safe, encoding, errors)
|
||||||
|
|
||||||
|
if isinstance(v, bytes):
|
||||||
|
v = quote_plus(v, safe)
|
||||||
|
else:
|
||||||
|
v = quote_plus(str(v), safe, encoding, errors)
|
||||||
l.append(k + '=' + v)
|
l.append(k + '=' + v)
|
||||||
else:
|
else:
|
||||||
for k, v in query:
|
for k, v in query:
|
||||||
k = quote_plus(str(k))
|
if isinstance(k, bytes):
|
||||||
if isinstance(v, str):
|
k = quote_plus(k, safe)
|
||||||
v = quote_plus(v)
|
else:
|
||||||
|
k = quote_plus(str(k), safe, encoding, errors)
|
||||||
|
|
||||||
|
if isinstance(v, bytes):
|
||||||
|
v = quote_plus(v, safe)
|
||||||
|
l.append(k + '=' + v)
|
||||||
|
elif isinstance(v, str):
|
||||||
|
v = quote_plus(v, safe, encoding, errors)
|
||||||
l.append(k + '=' + v)
|
l.append(k + '=' + v)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
|
@ -581,12 +599,16 @@ def urlencode(query, doseq=False):
|
||||||
x = len(v)
|
x = len(v)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# not a sequence
|
# not a sequence
|
||||||
v = quote_plus(str(v))
|
v = quote_plus(str(v), safe, encoding, errors)
|
||||||
l.append(k + '=' + v)
|
l.append(k + '=' + v)
|
||||||
else:
|
else:
|
||||||
# loop over the sequence
|
# loop over the sequence
|
||||||
for elt in v:
|
for elt in v:
|
||||||
l.append(k + '=' + quote_plus(str(elt)))
|
if isinstance(elt, bytes):
|
||||||
|
elt = quote_plus(elt, safe)
|
||||||
|
else:
|
||||||
|
elt = quote_plus(str(elt), safe, encoding, errors)
|
||||||
|
l.append(k + '=' + elt)
|
||||||
return '&'.join(l)
|
return '&'.join(l)
|
||||||
|
|
||||||
# Utilities to parse URLs (most of these return None for missing parts):
|
# Utilities to parse URLs (most of these return None for missing parts):
|
||||||
|
|
|
@ -75,6 +75,9 @@ C-API
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #5468: urlencode to handle bytes type and other encodings in its query
|
||||||
|
parameter. Patch by Dan Mahn.
|
||||||
|
|
||||||
- Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop
|
- Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop
|
||||||
module, ensure that the input string length is a multiple of the frame size
|
module, ensure that the input string length is a multiple of the frame size
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue