gh-96189: Fix test_invalid_utf8 on a number of build bots (GH-96190)

The clearing of the temporary directory is not working on some platforms and
leaving behind files.

This has been updated to use the pattern in test_cmd_line.py [1] using the
special TESTFN rather than a test directory.

[1] https://github.com/python/cpython/blob/main/Lib/test/test_cmd_line.py#L559
This commit is contained in:
Michael Droettboom 2022-08-23 16:52:55 -04:00 committed by GitHub
parent e046cf872e
commit 054328f0dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 38 additions and 38 deletions

View File

@ -239,50 +239,50 @@ def test_invalid_utf8(self):
# it's an otherwise valid Python source file.
template = b'"%s"\n'
with tempfile.TemporaryDirectory() as tmpd:
fn = os.path.join(tmpd, 'test.py')
fn = TESTFN
self.addCleanup(unlink, fn)
def check(content):
with open(fn, 'wb') as fp:
fp.write(template % content)
script_helper.assert_python_failure(fn)
def check(content):
with open(fn, 'wb') as fp:
fp.write(template % content)
script_helper.assert_python_failure(fn)
# continuation bytes in a sequence of 2, 3, or 4 bytes
continuation_bytes = [bytes([x]) for x in range(0x80, 0xC0)]
# start bytes of a 2-byte sequence equivalent to code points < 0x7F
invalid_2B_seq_start_bytes = [bytes([x]) for x in range(0xC0, 0xC2)]
# start bytes of a 4-byte sequence equivalent to code points > 0x10FFFF
invalid_4B_seq_start_bytes = [bytes([x]) for x in range(0xF5, 0xF8)]
invalid_start_bytes = (
continuation_bytes + invalid_2B_seq_start_bytes +
invalid_4B_seq_start_bytes + [bytes([x]) for x in range(0xF7, 0x100)]
)
# continuation bytes in a sequence of 2, 3, or 4 bytes
continuation_bytes = [bytes([x]) for x in range(0x80, 0xC0)]
# start bytes of a 2-byte sequence equivalent to code points < 0x7F
invalid_2B_seq_start_bytes = [bytes([x]) for x in range(0xC0, 0xC2)]
# start bytes of a 4-byte sequence equivalent to code points > 0x10FFFF
invalid_4B_seq_start_bytes = [bytes([x]) for x in range(0xF5, 0xF8)]
invalid_start_bytes = (
continuation_bytes + invalid_2B_seq_start_bytes +
invalid_4B_seq_start_bytes + [bytes([x]) for x in range(0xF7, 0x100)]
)
for byte in invalid_start_bytes:
check(byte)
for byte in invalid_start_bytes:
check(byte)
for sb in invalid_2B_seq_start_bytes:
for cb in continuation_bytes:
check(sb + cb)
for sb in invalid_2B_seq_start_bytes:
for cb in continuation_bytes:
check(sb + cb)
for sb in invalid_4B_seq_start_bytes:
for cb1 in continuation_bytes[:3]:
for cb3 in continuation_bytes[:3]:
check(sb+cb1+b'\x80'+cb3)
for sb in invalid_4B_seq_start_bytes:
for cb1 in continuation_bytes[:3]:
for cb3 in continuation_bytes[:3]:
check(sb+cb1+b'\x80'+cb3)
for cb in [bytes([x]) for x in range(0x80, 0xA0)]:
check(b'\xE0'+cb+b'\x80')
check(b'\xE0'+cb+b'\xBF')
# surrogates
for cb in [bytes([x]) for x in range(0xA0, 0xC0)]:
check(b'\xED'+cb+b'\x80')
check(b'\xED'+cb+b'\xBF')
for cb in [bytes([x]) for x in range(0x80, 0x90)]:
check(b'\xF0'+cb+b'\x80\x80')
check(b'\xF0'+cb+b'\xBF\xBF')
for cb in [bytes([x]) for x in range(0x90, 0xC0)]:
check(b'\xF4'+cb+b'\x80\x80')
check(b'\xF4'+cb+b'\xBF\xBF')
for cb in [bytes([x]) for x in range(0x80, 0xA0)]:
check(b'\xE0'+cb+b'\x80')
check(b'\xE0'+cb+b'\xBF')
# surrogates
for cb in [bytes([x]) for x in range(0xA0, 0xC0)]:
check(b'\xED'+cb+b'\x80')
check(b'\xED'+cb+b'\xBF')
for cb in [bytes([x]) for x in range(0x80, 0x90)]:
check(b'\xF0'+cb+b'\x80\x80')
check(b'\xF0'+cb+b'\xBF\xBF')
for cb in [bytes([x]) for x in range(0x90, 0xC0)]:
check(b'\xF4'+cb+b'\x80\x80')
check(b'\xF4'+cb+b'\xBF\xBF')
class BytesSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase):