mirror of https://github.com/python/cpython.git
Issue #22687: Fixed some corner cases in breaking words in tetxtwrap.
Got rid of quadratic complexity in breaking long words.
This commit is contained in:
parent
b365a06a84
commit
72bd327db0
|
@ -184,6 +184,16 @@ def test_hyphenated(self):
|
||||||
self.check_wrap(text, 42,
|
self.check_wrap(text, 42,
|
||||||
["this-is-a-useful-feature-for-reformatting-",
|
["this-is-a-useful-feature-for-reformatting-",
|
||||||
"posts-from-tim-peters'ly"])
|
"posts-from-tim-peters'ly"])
|
||||||
|
# The test tests current behavior but is not testing parts of the API.
|
||||||
|
expect = ("this-|is-|a-|useful-|feature-|for-|"
|
||||||
|
"reformatting-|posts-|from-|tim-|peters'ly").split('|')
|
||||||
|
self.check_wrap(text, 1, expect, break_long_words=False)
|
||||||
|
self.check_split(text, expect)
|
||||||
|
|
||||||
|
self.check_split('e-mail', ['e-mail'])
|
||||||
|
self.check_split('Jelly-O', ['Jelly-O'])
|
||||||
|
# The test tests current behavior but is not testing parts of the API.
|
||||||
|
self.check_split('half-a-crown', 'half-|a-|crown'.split('|'))
|
||||||
|
|
||||||
def test_hyphenated_numbers(self):
|
def test_hyphenated_numbers(self):
|
||||||
# Test that hyphenated numbers (eg. dates) are not broken like words.
|
# Test that hyphenated numbers (eg. dates) are not broken like words.
|
||||||
|
@ -195,6 +205,7 @@ def test_hyphenated_numbers(self):
|
||||||
'released on 1994-02-15.'])
|
'released on 1994-02-15.'])
|
||||||
self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
|
self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
|
||||||
'Python 1.0.1 was released on 1994-02-15.'])
|
'Python 1.0.1 was released on 1994-02-15.'])
|
||||||
|
self.check_wrap(text, 1, text.split(), break_long_words=False)
|
||||||
|
|
||||||
text = "I do all my shopping at 7-11."
|
text = "I do all my shopping at 7-11."
|
||||||
self.check_wrap(text, 25, ["I do all my shopping at",
|
self.check_wrap(text, 25, ["I do all my shopping at",
|
||||||
|
@ -202,6 +213,7 @@ def test_hyphenated_numbers(self):
|
||||||
self.check_wrap(text, 27, ["I do all my shopping at",
|
self.check_wrap(text, 27, ["I do all my shopping at",
|
||||||
"7-11."])
|
"7-11."])
|
||||||
self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
|
self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
|
||||||
|
self.check_wrap(text, 1, text.split(), break_long_words=False)
|
||||||
|
|
||||||
def test_em_dash(self):
|
def test_em_dash(self):
|
||||||
# Test text with em-dashes
|
# Test text with em-dashes
|
||||||
|
@ -326,6 +338,10 @@ def test_punct_hyphens(self):
|
||||||
self.check_split("the ['wibble-wobble'] widget",
|
self.check_split("the ['wibble-wobble'] widget",
|
||||||
['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
|
['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
|
||||||
|
|
||||||
|
# The test tests current behavior but is not testing parts of the API.
|
||||||
|
self.check_split("what-d'you-call-it.",
|
||||||
|
"what-d'you-|call-|it.".split('|'))
|
||||||
|
|
||||||
def test_funky_parens (self):
|
def test_funky_parens (self):
|
||||||
# Second part of SF bug #596434: long option strings inside
|
# Second part of SF bug #596434: long option strings inside
|
||||||
# parentheses.
|
# parentheses.
|
||||||
|
|
|
@ -79,10 +79,25 @@ class TextWrapper:
|
||||||
# splits into
|
# splits into
|
||||||
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
|
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
|
||||||
# (after stripping out empty strings).
|
# (after stripping out empty strings).
|
||||||
wordsep_re = re.compile(
|
word_punct = r'[\w!"\'&.,?]'
|
||||||
r'(\s+|' # any whitespace
|
letter = r'[^\d\W]'
|
||||||
r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
|
wordsep_re = re.compile(r'''
|
||||||
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
|
( # any whitespace
|
||||||
|
\s+
|
||||||
|
| # em-dash between words
|
||||||
|
(?<=%(wp)s) -{2,} (?=\w)
|
||||||
|
| # word, possibly hyphenated
|
||||||
|
\S+? (?:
|
||||||
|
# hyphenated word
|
||||||
|
-(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
|
||||||
|
(?= %(lt)s -? %(lt)s)
|
||||||
|
| # end of word
|
||||||
|
(?=\s|\Z)
|
||||||
|
| # em-dash
|
||||||
|
(?<=%(wp)s) (?=-{2,}\w)
|
||||||
|
)
|
||||||
|
)''' % {'wp': word_punct, 'lt': letter}, re.VERBOSE)
|
||||||
|
del word_punct, letter
|
||||||
|
|
||||||
# This less funky little regex just split on recognized spaces. E.g.
|
# This less funky little regex just split on recognized spaces. E.g.
|
||||||
# "Hello there -- you goof-ball, use the -b option!"
|
# "Hello there -- you goof-ball, use the -b option!"
|
||||||
|
|
|
@ -26,6 +26,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #22687: Fixed some corner cases in breaking words in tetxtwrap.
|
||||||
|
Got rid of quadratic complexity in breaking long words.
|
||||||
|
|
||||||
- Issue #20289: The copy module now uses pickle protocol 4 (PEP 3154) and
|
- Issue #20289: The copy module now uses pickle protocol 4 (PEP 3154) and
|
||||||
supports copying of instances of classes whose __new__ method takes
|
supports copying of instances of classes whose __new__ method takes
|
||||||
keyword-only arguments.
|
keyword-only arguments.
|
||||||
|
|
Loading…
Reference in New Issue