Optimizing regexp calls to improve performance (#1532)

This commit is contained in:
Chen Zhidong 2023-04-23 18:00:31 +08:00 committed by GitHub
parent 63d8a09082
commit 787453c6f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 35 additions and 34 deletions

46
calc.go
View File

@ -193,6 +193,24 @@ var (
return fmt.Sprintf("R[%d]C[%d]", row, col), nil
},
}
formularFormats = []*regexp.Regexp{
regexp.MustCompile(`^(\d+)$`),
regexp.MustCompile(`^=(.*)$`),
regexp.MustCompile(`^<>(.*)$`),
regexp.MustCompile(`^<=(.*)$`),
regexp.MustCompile(`^>=(.*)$`),
regexp.MustCompile(`^<(.*)$`),
regexp.MustCompile(`^>(.*)$`),
}
formularCriterias = []byte{
criteriaEq,
criteriaEq,
criteriaNe,
criteriaLe,
criteriaGe,
criteriaL,
criteriaG,
}
)
// calcContext defines the formula execution context.
@ -1654,33 +1672,11 @@ func formulaCriteriaParser(exp string) (fc *formulaCriteria) {
if exp == "" {
return
}
if match := regexp.MustCompile(`^(\d+)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaEq, match[1]
for i, re := range formularFormats {
if match := re.FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = formularCriterias[i], match[1]
return
}
if match := regexp.MustCompile(`^=(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaEq, match[1]
return
}
if match := regexp.MustCompile(`^<>(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaNe, match[1]
return
}
if match := regexp.MustCompile(`^<=(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaLe, match[1]
return
}
if match := regexp.MustCompile(`^>=(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaGe, match[1]
return
}
if match := regexp.MustCompile(`^<(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaL, match[1]
return
}
if match := regexp.MustCompile(`^>(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaG, match[1]
return
}
if strings.Contains(exp, "?") {
exp = strings.ReplaceAll(exp, "?", ".")

View File

@ -977,6 +977,7 @@ func (f *File) searchSheet(name, value string, regSearch bool) (result []string,
if sst, err = f.sharedStringsReader(); err != nil {
return
}
regex := regexp.MustCompile(value)
decoder := f.xmlNewDecoder(bytes.NewReader(f.readBytes(name)))
for {
var token xml.Token
@ -1001,7 +1002,6 @@ func (f *File) searchSheet(name, value string, regSearch bool) (result []string,
_ = decoder.DecodeElement(&colCell, &xmlElement)
val, _ := colCell.getValueFrom(f, sst, false)
if regSearch {
regex := regexp.MustCompile(value)
if !regex.MatchString(val) {
continue
}

View File

@ -23,6 +23,13 @@ import (
"unicode/utf8"
)
var (
expressionFormat = regexp.MustCompile(`"(?:[^"]|"")*"|\S+`)
conditionFormat = regexp.MustCompile(`(or|\|\|)`)
blankFormat = regexp.MustCompile("blanks|nonblanks")
matchFormat = regexp.MustCompile("[*?]")
)
// parseTableOptions provides a function to parse the format settings of the
// table with default value.
func parseTableOptions(opts *Table) (*Table, error) {
@ -400,8 +407,7 @@ func (f *File) autoFilter(sheet, ref string, columns, col int, opts []AutoFilter
return fmt.Errorf("incorrect index of column '%s'", opt.Column)
}
fc := &xlsxFilterColumn{ColID: offset}
re := regexp.MustCompile(`"(?:[^"]|"")*"|\S+`)
token := re.FindAllString(opt.Expression, -1)
token := expressionFormat.FindAllString(opt.Expression, -1)
if len(token) != 3 && len(token) != 7 {
return fmt.Errorf("incorrect number of tokens in criteria '%s'", opt.Expression)
}
@ -484,8 +490,7 @@ func (f *File) parseFilterExpression(expression string, tokens []string) ([]int,
// expressions).
conditional := 0
c := tokens[3]
re, _ := regexp.Match(`(or|\|\|)`, []byte(c))
if re {
if conditionFormat.Match([]byte(c)) {
conditional = 1
}
expression1, token1, err := f.parseFilterTokens(expression, tokens[:3])
@ -533,7 +538,7 @@ func (f *File) parseFilterTokens(expression string, tokens []string) ([]int, str
}
token := tokens[2]
// Special handling for Blanks/NonBlanks.
re, _ := regexp.Match("blanks|nonblanks", []byte(strings.ToLower(token)))
re := blankFormat.Match([]byte(strings.ToLower(token)))
if re {
// Only allow Equals or NotEqual in this context.
if operator != 2 && operator != 5 {
@ -558,7 +563,7 @@ func (f *File) parseFilterTokens(expression string, tokens []string) ([]int, str
}
// If the string token contains an Excel match character then change the
// operator type to indicate a non "simple" equality.
re, _ = regexp.Match("[*?]", []byte(token))
re = matchFormat.Match([]byte(token))
if operator == 2 && re {
operator = 22
}