python-future/tests/test_future/test_str.py

592 lines
19 KiB
Python

# -*- coding: utf-8 -*-
"""
Tests for the backported class:`str` class.
"""
from __future__ import absolute_import, unicode_literals, print_function
from future.builtins import *
from future import utils
from future.tests.base import unittest, expectedFailurePY2
import os
TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮'
class TestStr(unittest.TestCase):
def test_str(self):
self.assertFalse(str is bytes)
self.assertEqual(str('blah'), u'blah') # u'' prefix: Py3.3 and Py2 only
self.assertEqual(str(b'1234'), "b'1234'")
def test_bool_str(self):
s1 = str(u'abc')
s2 = u'abc'
s3 = str(u'')
s4 = u''
self.assertEqual(bool(s1), bool(s2))
self.assertEqual(bool(s3), bool(s4))
def test_os_path_join(self):
"""
Issue #15: can't os.path.join(u'abc', str(u'def'))
"""
self.assertEqual(os.path.join(u'abc', str(u'def')),
u'abc{0}def'.format(os.sep))
def test_str_encode_utf8(self):
b = str(TEST_UNICODE_STR).encode('utf-8')
self.assertTrue(isinstance(b, bytes))
self.assertFalse(isinstance(b, str))
s = b.decode('utf-8')
self.assertTrue(isinstance(s, str))
self.assertEqual(s, TEST_UNICODE_STR)
def test_str_encode_cp1251(self):
b1 = b'\xcd\xeb\xff'
s1 = str(b1, 'cp1251')
self.assertEqual(s1, u'Нля')
b2 = bytes(b'\xcd\xeb\xff')
s2 = str(b2, 'cp1251')
self.assertEqual(s2, u'Нля')
def test_str_encode_decode_with_py2_str_arg(self):
# Try passing a standard Py2 string (as if unicode_literals weren't imported)
b = str(TEST_UNICODE_STR).encode(utils.bytes_to_native_str(b'utf-8'))
self.assertTrue(isinstance(b, bytes))
self.assertFalse(isinstance(b, str))
s = b.decode(utils.bytes_to_native_str(b'utf-8'))
self.assertTrue(isinstance(s, str))
self.assertEqual(s, TEST_UNICODE_STR)
def test_str_encode_decode_big5(self):
a = u'Unicode string: \u5b54\u5b50'
self.assertEqual(str(a), a.encode('big5').decode('big5'))
def test_str_empty(self):
"""
str() -> u''
"""
self.assertEqual(str(), u'')
def test_str_iterable_of_ints(self):
self.assertEqual(str([65, 66, 67]), '[65, 66, 67]')
self.assertNotEqual(str([65, 66, 67]), 'ABC')
def test_str_str(self):
self.assertEqual(str('ABC'), u'ABC')
self.assertEqual(str('ABC'), 'ABC')
def test_str_is_str(self):
s = str(u'ABC')
self.assertTrue(str(s) is s)
self.assertEqual(repr(str(s)), "'ABC'")
def test_str_fromhex(self):
self.assertFalse(hasattr(str, 'fromhex'))
def test_str_hasattr_decode(self):
"""
This test tests whether hasattr(s, 'decode') is False, like it is on Py3.
Sometimes code (such as http.client in Py3.3) checks hasattr(mystring,
'decode') to determine if a string-like thing needs encoding. It would
be nice to have this return False so the string can be treated on Py2
like a Py3 string.
"""
s = str(u'abcd')
self.assertFalse(hasattr(s, 'decode'))
self.assertTrue(hasattr(s, 'encode'))
def test_isinstance_str(self):
self.assertTrue(isinstance(str('blah'), str))
def test_isinstance_str_subclass(self):
"""
Issue #89
"""
value = str(u'abc')
class Magic(str):
pass
self.assertTrue(isinstance(value, str))
self.assertFalse(isinstance(value, Magic))
def test_str_getitem(self):
s = str('ABCD')
self.assertNotEqual(s[0], 65)
self.assertEqual(s[0], 'A')
self.assertEqual(s[-1], 'D')
self.assertEqual(s[0:1], 'A')
self.assertEqual(s[:], u'ABCD')
@unittest.expectedFailure
def test_u_literal_creates_newstr_object(self):
"""
It would nice if the u'' or '' literal syntax could be coaxed
into producing our new str objects somehow ...
"""
s = u'ABCD'
self.assertTrue(isinstance(s, str))
self.assertFalse(repr(b).startswith('b'))
def test_repr(self):
s = str('ABCD')
self.assertFalse(repr(s).startswith('b'))
def test_str(self):
b = str('ABCD')
self.assertTrue(str(b), 'ABCD')
def test_str_setitem(self):
s = 'ABCD'
with self.assertRaises(TypeError):
s[0] = b'B'
def test_str_iteration(self):
s = str('ABCD')
for item in s:
self.assertFalse(isinstance(item, int))
self.assertTrue(isinstance(item, str))
self.assertNotEqual(list(s), [65, 66, 67, 68])
self.assertEqual(list(s), ['A', 'B', 'C', 'D'])
def test_str_plus_bytes(self):
s = str(u'ABCD')
b = b'EFGH'
# We allow this now:
# with self.assertRaises(TypeError):
# s + b
# str objects don't have an __radd__ method, so the following
# does not raise a TypeError. Is this a problem?
# with self.assertRaises(TypeError):
# b + s
# Now with our custom bytes object:
b2 = bytes(b'EFGH')
with self.assertRaises(TypeError):
s + b2
with self.assertRaises(TypeError):
b2 + s
def test_str_plus_str(self):
s1 = str('ABCD')
s2 = s1 + s1
self.assertEqual(s2, u'ABCDABCD')
self.assertTrue(isinstance(s2, str))
s3 = s1 + u'ZYXW'
self.assertEqual(s3, 'ABCDZYXW')
self.assertTrue(isinstance(s3, str))
s4 = 'ZYXW' + s1
self.assertEqual(s4, 'ZYXWABCD')
self.assertTrue(isinstance(s4, str))
def test_str_join_str(self):
s = str(' * ')
strings = ['AB', 'EFGH', 'IJKL', TEST_UNICODE_STR]
result = s.join(strings)
self.assertEqual(result, 'AB * EFGH * IJKL * ' + TEST_UNICODE_STR)
self.assertTrue(isinstance(result, str))
def test_str_join_bytes(self):
s = str('ABCD')
byte_strings1 = [b'EFGH', u'IJKL']
# We allow this on Python 2 for compatibility with old libraries:
if utils.PY2:
self.assertEqual(s.join(byte_strings1), u'EFGHABCDIJKL')
byte_strings2 = [bytes(b'EFGH'), u'IJKL']
with self.assertRaises(TypeError):
s.join(byte_strings2)
def test_str_join_staticmethod(self):
"""
Issue #33
"""
c = str.join('-', ['a', 'b'])
self.assertEqual(c, 'a-b')
self.assertEqual(type(c), str)
def test_str_join_staticmethod_workaround_1(self):
"""
Issue #33
"""
c = str('-').join(['a', 'b'])
self.assertEqual(c, 'a-b')
self.assertEqual(type(c), str)
def test_str_join_staticmethod_workaround_2(self):
"""
Issue #33
"""
c = str.join(str('-'), ['a', 'b'])
self.assertEqual(c, 'a-b')
self.assertEqual(type(c), str)
def test_str_replace(self):
s = str('ABCD')
c = s.replace('A', 'F')
self.assertEqual(c, 'FBCD')
self.assertTrue(isinstance(c, str))
with self.assertRaises(TypeError):
s.replace(bytes(b'A'), u'F')
with self.assertRaises(TypeError):
s.replace(u'A', bytes(b'F'))
def test_str_partition(self):
s1 = str('ABCD')
parts = s1.partition('B')
self.assertEqual(parts, ('A', 'B', 'CD'))
self.assertTrue(all([isinstance(p, str) for p in parts]))
s2 = str('ABCDABCD')
parts = s2.partition('B')
self.assertEqual(parts, ('A', 'B', 'CDABCD'))
def test_str_rpartition(self):
s2 = str('ABCDABCD')
parts = s2.rpartition('B')
self.assertEqual(parts, ('ABCDA', 'B', 'CD'))
self.assertTrue(all([isinstance(p, str) for p in parts]))
def test_str_contains_something(self):
s = str('ABCD')
self.assertTrue('A' in s)
if utils.PY2:
self.assertTrue(b'A' in s)
with self.assertRaises(TypeError):
bytes(b'A') in s
with self.assertRaises(TypeError):
65 in s # unlike bytes
self.assertTrue('AB' in s)
self.assertFalse(str([65, 66]) in s) # unlike bytes
self.assertFalse('AC' in s)
self.assertFalse('Z' in s)
def test_str_index(self):
s = str('ABCD')
self.assertEqual(s.index('B'), 1)
with self.assertRaises(TypeError):
s.index(67)
with self.assertRaises(TypeError):
s.index(bytes(b'C'))
def test_startswith(self):
s = str('abcd')
self.assertTrue(s.startswith('a'))
self.assertTrue(s.startswith(('a', 'd')))
self.assertTrue(s.startswith(str('ab')))
if utils.PY2:
# We allow this, because e.g. Python 2 os.path.join concatenates
# its arg with a byte-string '/' indiscriminately.
self.assertFalse(s.startswith(b'A'))
self.assertTrue(s.startswith(b'a'))
with self.assertRaises(TypeError) as cm:
self.assertFalse(s.startswith(bytes(b'A')))
with self.assertRaises(TypeError) as cm:
s.startswith((bytes(b'A'), bytes(b'B')))
with self.assertRaises(TypeError) as cm:
s.startswith(65)
def test_join(self):
sep = str('-')
self.assertEqual(sep.join('abcd'), 'a-b-c-d')
if utils.PY2:
sep.join(b'abcd')
with self.assertRaises(TypeError) as cm:
sep.join(bytes(b'abcd'))
def test_endswith(self):
s = str('abcd')
self.assertTrue(s.endswith('d'))
self.assertTrue(s.endswith(('b', 'd')))
self.assertTrue(s.endswith(str('cd')))
self.assertFalse(s.endswith(('A', 'B')))
if utils.PY2:
self.assertFalse(s.endswith(b'D'))
self.assertTrue(s.endswith((b'D', b'd')))
with self.assertRaises(TypeError) as cm:
s.endswith(65)
with self.assertRaises(TypeError) as cm:
s.endswith((bytes(b'D'),))
def test_split(self):
s = str('ABCD')
self.assertEqual(s.split('B'), ['A', 'CD'])
if utils.PY2:
self.assertEqual(s.split(b'B'), ['A', 'CD'])
with self.assertRaises(TypeError) as cm:
s.split(bytes(b'B'))
def test_rsplit(self):
s = str('ABCD')
self.assertEqual(s.rsplit('B'), ['A', 'CD'])
if utils.PY2:
self.assertEqual(s.rsplit(b'B'), ['A', 'CD'])
with self.assertRaises(TypeError) as cm:
s.rsplit(bytes(b'B'))
def test_eq_bytes(self):
s = str('ABCD')
b = bytes(b'ABCD')
self.assertNotEqual(s, b)
self.assertNotEqual(str(''), bytes(b''))
native_s = 'ABCD'
native_b = b'ABCD'
self.assertFalse(b == native_s)
self.assertTrue(b != native_s)
# Fails on Py2:
# self.assertNotEqual(native_s, b)
# with no obvious way to change this.
# For backward compatibility with broken string-handling code in
# Py2 libraries, we allow the following:
if utils.PY2:
self.assertTrue(native_b == s)
self.assertFalse(s != native_b)
def test_eq(self):
s = str('ABCD')
self.assertEqual('ABCD', s)
self.assertEqual(s, 'ABCD')
self.assertEqual(s, s)
self.assertTrue(u'ABCD' == s)
if utils.PY2:
self.assertTrue(b'ABCD' == s)
else:
self.assertFalse(b'ABCD' == s)
self.assertFalse(bytes(b'ABCD') == s)
# We want to ensure comparison against unknown types return
# NotImplemented so that the interpreter can rerun the test with the
# other class. We expect the operator to return False if both return
# NotImplemented.
class OurCustomString(object):
def __init__(self, string):
self.string = string
def __eq__(self, other):
return NotImplemented
our_str = OurCustomString("foobar")
new_str = str("foobar")
self.assertFalse(our_str == new_str)
self.assertFalse(new_str == our_str)
self.assertIs(new_str.__eq__(our_str), NotImplemented)
self.assertIs(our_str.__eq__(new_str), NotImplemented)
def test_hash(self):
s = str('ABCD')
self.assertIsInstance(hash(s),int)
def test_ne(self):
s = str('ABCD')
self.assertNotEqual('A', s)
self.assertNotEqual(s, 'A')
self.assertNotEqual(s, 5)
self.assertNotEqual(2.7, s)
self.assertNotEqual(s, ['A', 'B', 'C', 'D'])
if utils.PY2:
self.assertFalse(b'ABCD' != s)
else:
self.assertTrue(b'ABCD' != s)
self.assertTrue(bytes(b'ABCD') != s)
def test_cmp(self):
s = str(u'ABC')
with self.assertRaises(TypeError):
s > 3
with self.assertRaises(TypeError):
s < 1000
with self.assertRaises(TypeError):
s <= 3
with self.assertRaises(TypeError):
s >= int(3)
with self.assertRaises(TypeError):
s < 3.3
with self.assertRaises(TypeError):
s > (3.3 + 3j)
with self.assertRaises(TypeError):
s >= (1, 2)
with self.assertRaises(TypeError):
s <= [1, 2]
def test_mul(self):
s = str(u'ABC')
c = s * 4
self.assertTrue(isinstance(c, str))
self.assertEqual(c, u'ABCABCABCABC')
d = s * int(4)
self.assertTrue(isinstance(d, str))
self.assertEqual(d, u'ABCABCABCABC')
if utils.PY2:
e = s * long(4)
self.assertTrue(isinstance(e, str))
self.assertEqual(e, u'ABCABCABCABC')
with self.assertRaises(TypeError):
s * 3.3
with self.assertRaises(TypeError):
s * (3.3 + 3j)
def test_rmul(self):
s = str(u'XYZ')
c = 3 * s
self.assertTrue(isinstance(c, str))
self.assertEqual(c, u'XYZXYZXYZ')
d = s * int(3)
self.assertTrue(isinstance(d, str))
self.assertEqual(d, u'XYZXYZXYZ')
if utils.PY2:
e = long(3) * s
self.assertTrue(isinstance(e, str))
self.assertEqual(e, u'XYZXYZXYZ')
with self.assertRaises(TypeError):
3.3 * s
with self.assertRaises(TypeError):
(3.3 + 3j) * s
@unittest.skip('Fails on Python <= 2.7.6 due to string subclass slicing bug')
def test_slice(self):
"""
Do slices return newstr objects?
"""
s = str(u'abcd')
self.assertEqual(s[:2], u'ab')
self.assertEqual(type(s[:2]), str)
self.assertEqual(s[-2:], u'cd')
self.assertEqual(type(s[-2:]), str)
@unittest.skip('Fails on Python <= 2.7.6 due to string subclass slicing bug')
def test_subclassing(self):
"""
Can newstr be subclassed and do str methods then return instances of
the same class? (This is the Py3 behaviour).
"""
class SubClass(str):
pass
s = SubClass(u'abcd')
self.assertEqual(type(s), SubClass)
self.assertEqual(type(s + s), str)
self.assertEqual(type(s[0]), str)
self.assertEqual(type(s[:2]), str)
self.assertEqual(type(s.join([u'_', u'_', u'_'])), str)
def test_subclassing_2(self):
"""
Tests __new__ method in subclasses. Fails in versions <= 0.11.4
"""
class SubClass(str):
def __new__(cls, *args, **kwargs):
self = str.__new__(cls, *args, **kwargs)
assert type(self) == SubClass
return self
s = SubClass(u'abcd')
self.assertTrue(True)
# From Python 3.3: test_unicode.py
def checkequalnofix(self, result, object, methodname, *args):
method = getattr(object, methodname)
realresult = method(*args)
self.assertEqual(realresult, result)
self.assertTrue(type(realresult) is type(result))
# if the original is returned make sure that
# this doesn't happen with subclasses
if realresult is object:
class usub(str):
def __repr__(self):
return 'usub(%r)' % str.__repr__(self)
object = usub(object)
method = getattr(object, methodname)
realresult = method(*args)
self.assertEqual(realresult, result)
self.assertTrue(object is not realresult)
type2test = str
def test_maketrans_translate(self):
# these work with plain translate()
self.checkequalnofix('bbbc', 'abababc', 'translate',
{ord('a'): None})
self.checkequalnofix('iiic', 'abababc', 'translate',
{ord('a'): None, ord('b'): ord('i')})
self.checkequalnofix('iiix', 'abababc', 'translate',
{ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
self.checkequalnofix('c', 'abababc', 'translate',
{ord('a'): None, ord('b'): ''})
self.checkequalnofix('xyyx', 'xzx', 'translate',
{ord('z'): 'yy'})
# this needs maketrans()
self.checkequalnofix('abababc', 'abababc', 'translate',
{'b': '<i>'})
tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
# test alternative way of calling maketrans()
tbl = self.type2test.maketrans('abc', 'xyz', 'd')
self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
self.assertRaises(TypeError, self.type2test.maketrans)
self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})
self.assertRaises(TypeError, 'hello'.translate)
self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
@expectedFailurePY2
def test_multiple_inheritance(self):
"""
Issue #96 (for newstr instead of newobject)
"""
if utils.PY2:
from collections import Container
else:
from collections.abc import Container
class Base(str):
pass
class Foo(Base, Container):
def __contains__(self, item):
return False
@expectedFailurePY2
def test_with_metaclass_and_str(self):
"""
Issue #91 (for newstr instead of newobject)
"""
from future.utils import with_metaclass
class MetaClass(type):
pass
class TestClass(with_metaclass(MetaClass, str)):
pass
def test_surrogateescape_encoding(self):
"""
Tests whether surrogateescape encoding works correctly.
"""
pairs = [(u'\udcc3', b'\xc3'),
(u'\udcff', b'\xff')]
for (s, b) in pairs:
encoded = str(s).encode('utf-8', 'surrogateescape')
self.assertEqual(b, encoded)
self.assertTrue(isinstance(encoded, bytes))
self.assertEqual(s, encoded.decode('utf-8', 'surrogateescape'))
if __name__ == '__main__':
unittest.main()