mirror of https://github.com/python/cpython.git
Python 2 can encode/decode surrogates to utf-8. Add a test for this.
This commit is contained in:
parent
8ef3907044
commit
370d85cee4
|
@ -667,11 +667,17 @@ def test_utf8_decode_invalid_sequences(self):
|
|||
# see http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
|
||||
# (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt
|
||||
#for cb in map(chr, range(0xA0, 0xC0)):
|
||||
#sys.__stdout__.write('\\xED\\x%02x\\x80\n' % ord(cb))
|
||||
#self.assertRaises(UnicodeDecodeError,
|
||||
#('\xED'+cb+'\x80').decode, 'utf-8')
|
||||
#self.assertRaises(UnicodeDecodeError,
|
||||
#('\xED'+cb+'\xBF').decode, 'utf-8')
|
||||
# but since they are valid on Python 2 add a test for that:
|
||||
for cb, surrogate in zip(map(chr, range(0xA0, 0xC0)),
|
||||
map(unichr, range(0xd800, 0xe000, 64))):
|
||||
encoded = '\xED'+cb+'\x80'
|
||||
self.assertEqual(encoded.decode('utf-8'), surrogate)
|
||||
self.assertEqual(surrogate.encode('utf-8'), encoded)
|
||||
|
||||
for cb in map(chr, range(0x80, 0x90)):
|
||||
self.assertRaises(UnicodeDecodeError,
|
||||
('\xF0'+cb+'\x80\x80').decode, 'utf-8')
|
||||
|
|
Loading…
Reference in New Issue