This closes #1961, add shared formula cell cache for speedup calculation (#2118)

- Reuse formula shared index when update refer cell formula
- Fix shared cell not been updated on update refer cell formula with new range
- Upgrade dependencies package go-deepcopy to v1.5.2
- Update unit tests
This commit is contained in:
shcabin 2025-04-26 17:29:33 +08:00 committed by GitHub
parent 0f19d7fcd7
commit b9f2c9e005
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 177 additions and 58 deletions

91
cell.go
View File

@ -689,7 +689,8 @@ func (f *File) getCellFormula(sheet, cell string, transformed bool) (string, err
return "", false, nil
}
if c.F.T == STCellFormulaTypeShared && c.F.Si != nil {
return getSharedFormula(x, *c.F.Si, c.R), true, nil
formula, err := getSharedFormula(x, *c.F.Si, c.R)
return formula, true, err
}
return c.F.Content, true, nil
})
@ -793,6 +794,7 @@ func (f *File) SetCellFormula(sheet, cell, formula string, opts ...FormulaOpts)
return err
}
if formula == "" {
ws.deleteSharedFormula(c)
c.F = nil
return f.deleteCalcChain(f.getSheetID(sheet), cell)
}
@ -815,7 +817,8 @@ func (f *File) SetCellFormula(sheet, cell, formula string, opts ...FormulaOpts)
}
}
if c.F.T == STCellFormulaTypeShared {
if err = ws.setSharedFormula(*opt.Ref); err != nil {
ws.deleteSharedFormula(c)
if err = ws.setSharedFormula(cell, *opt.Ref); err != nil {
return err
}
}
@ -890,22 +893,28 @@ func (f *File) setArrayFormulaCells() error {
}
// setSharedFormula set shared formula for the cells.
func (ws *xlsxWorksheet) setSharedFormula(ref string) error {
func (ws *xlsxWorksheet) setSharedFormula(cell, ref string) error {
coordinates, err := rangeRefToCoordinates(ref)
if err != nil {
return err
}
_ = sortCoordinates(coordinates)
cnt := ws.countSharedFormula()
for c := coordinates[0]; c <= coordinates[2]; c++ {
for r := coordinates[1]; r <= coordinates[3]; r++ {
ws.prepareSheetXML(c, r)
cell := &ws.SheetData.Row[r-1].C[c-1]
if cell.F == nil {
cell.F = &xlsxF{}
si := ws.countSharedFormula()
for col := coordinates[0]; col <= coordinates[2]; col++ {
for rol := coordinates[1]; rol <= coordinates[3]; rol++ {
ws.prepareSheetXML(col, rol)
c := &ws.SheetData.Row[rol-1].C[col-1]
if c.F == nil {
c.F = &xlsxF{}
}
cell.F.T = STCellFormulaTypeShared
cell.F.Si = &cnt
c.F.T = STCellFormulaTypeShared
if c.R == cell {
if c.F.Ref != "" {
si = *c.F.Si
continue
}
}
c.F.Si = &si
}
}
return err
@ -923,6 +932,23 @@ func (ws *xlsxWorksheet) countSharedFormula() (count int) {
return
}
// deleteSharedFormula delete shared formula cell from worksheet shared formula
// index cache and remove all shared cells formula which refer to the cell which
// containing the formula.
func (ws *xlsxWorksheet) deleteSharedFormula(c *xlsxC) {
if c.F != nil && c.F.Si != nil && c.F.Ref != "" {
si := *c.F.Si
ws.formulaSI.Delete(si)
for r, row := range ws.SheetData.Row {
for c, cell := range row.C {
if cell.F != nil && cell.F.Si != nil && *cell.F.Si == si && cell.F.Ref == "" {
ws.SheetData.Row[r].C[c].F = nil
}
}
}
}
}
// GetCellHyperLink gets a cell hyperlink based on the given worksheet name and
// cell reference. If the cell has a hyperlink, it will return 'true' and
// the link address, otherwise it will return 'false' and an empty link
@ -1640,18 +1666,27 @@ func isOverlap(rect1, rect2 []int) bool {
cellInRange([]int{rect2[2], rect2[3]}, rect1)
}
// parseSharedFormula generate dynamic part of shared formula for target cell
// by given column and rows distance and origin shared formula.
func parseSharedFormula(dCol, dRow int, orig string) string {
// convertSharedFormula creates a non shared formula from the shared formula
// counterpart by given cell reference which not containing the formula.
func (c *xlsxC) convertSharedFormula(cell string) (string, error) {
col, row, err := CellNameToCoordinates(cell)
if err != nil {
return "", err
}
sharedCol, sharedRow, err := CellNameToCoordinates(c.R)
if err != nil {
return "", err
}
dCol, dRow := col-sharedCol, row-sharedRow
ps := efp.ExcelParser()
tokens := ps.Parse(string(orig))
for i := 0; i < len(tokens); i++ {
tokens := ps.Parse(c.F.Content)
for i := range tokens {
token := tokens[i]
if token.TType == efp.TokenTypeOperand && token.TSubType == efp.TokenSubTypeRange {
tokens[i].TValue = shiftCell(token.TValue, dCol, dRow)
}
}
return ps.Render()
return ps.Render(), nil
}
// getSharedFormula find a cell contains the same formula as another cell,
@ -1662,21 +1697,23 @@ func parseSharedFormula(dCol, dRow int, orig string) string {
//
// Note that this function not validate ref tag to check the cell whether in
// allow range reference, and always return origin shared formula.
func getSharedFormula(ws *xlsxWorksheet, si int, cell string) string {
for row := 0; row < len(ws.SheetData.Row); row++ {
func getSharedFormula(ws *xlsxWorksheet, si int, cell string) (string, error) {
val, ok := ws.formulaSI.Load(si)
if ok {
return val.(*xlsxC).convertSharedFormula(cell)
}
for row := range ws.SheetData.Row {
r := &ws.SheetData.Row[row]
for column := 0; column < len(r.C); column++ {
for column := range r.C {
c := &r.C[column]
if c.F != nil && c.F.Ref != "" && c.F.T == STCellFormulaTypeShared && c.F.Si != nil && *c.F.Si == si {
col, row, _ := CellNameToCoordinates(cell)
sharedCol, sharedRow, _ := CellNameToCoordinates(c.R)
dCol := col - sharedCol
dRow := row - sharedRow
return parseSharedFormula(dCol, dRow, c.F.Content)
ws.formulaSI.Store(si, c)
return c.convertSharedFormula(cell)
}
}
}
return ""
return "", nil
}
// shiftCell returns the cell shifted according to dCol and dRow taking into

View File

@ -563,7 +563,7 @@ func TestGetValueFrom(t *testing.T) {
assert.NoError(t, err)
value, err := c.getValueFrom(f, sst, false)
assert.NoError(t, err)
assert.Equal(t, "", value)
assert.Empty(t, value)
c = xlsxC{T: "s", V: " 1 "}
value, err = c.getValueFrom(f, &xlsxSST{Count: 1, SI: []xlsxSI{{}, {T: &xlsxT{Val: "s"}}}}, false)
@ -602,13 +602,17 @@ func TestGetCellFormula(t *testing.T) {
formula, err := f.GetCellFormula("Sheet1", "B3")
assert.NoError(t, err)
assert.Equal(t, expected, formula)
// Test get shared formula form cache
formula, err = f.GetCellFormula("Sheet1", "B3")
assert.NoError(t, err)
assert.Equal(t, expected, formula)
}
f.Sheet.Delete("xl/worksheets/sheet1.xml")
f.Pkg.Store("xl/worksheets/sheet1.xml", []byte(`<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"><sheetData><row r="2"><c r="B2"><f t="shared" si="0"></f></c></row></sheetData></worksheet>`))
formula, err := f.GetCellFormula("Sheet1", "B2")
assert.NoError(t, err)
assert.Equal(t, "", formula)
assert.Empty(t, formula)
// Test get array formula with invalid cell range reference
f = NewFile()
@ -628,6 +632,81 @@ func TestGetCellFormula(t *testing.T) {
f.Sheet.Delete("xl/worksheets/sheet1.xml")
f.Pkg.Store("xl/worksheets/sheet1.xml", MacintoshCyrillicCharset)
assert.EqualError(t, f.setArrayFormulaCells(), "XML syntax error on line 1: invalid UTF-8")
// Test get shared formula after updated refer cell formula, the shared
// formula cell reference range covered the previous.
f = NewFile()
formulaType, ref = STCellFormulaTypeShared, "C2:C6"
assert.NoError(t, f.SetCellFormula("Sheet1", "C2", "=A2+B2", FormulaOpts{Ref: &ref, Type: &formulaType}))
formula, err = f.GetCellFormula("Sheet1", "C2")
assert.NoError(t, err)
assert.Equal(t, "A2+B2", formula)
formula, err = f.GetCellFormula("Sheet1", "C6")
assert.NoError(t, err)
assert.Equal(t, "A6+B6", formula)
formulaType, ref = STCellFormulaTypeShared, "C2:C8"
assert.NoError(t, f.SetCellFormula("Sheet1", "C2", "=A2*B2", FormulaOpts{Ref: &ref, Type: &formulaType}))
formula, err = f.GetCellFormula("Sheet1", "C2")
assert.NoError(t, err)
assert.Equal(t, "A2*B2", formula)
formula, err = f.GetCellFormula("Sheet1", "C8")
assert.NoError(t, err)
assert.Equal(t, "A8*B8", formula)
assert.NoError(t, f.Close())
// Test get shared formula after updated refer cell formula, the shared
// formula cell reference range not over the previous.
f = NewFile()
formulaType, ref = STCellFormulaTypeShared, "C2:C6"
assert.NoError(t, f.SetCellFormula("Sheet1", "C2", "=A2+B2", FormulaOpts{Ref: &ref, Type: &formulaType}))
formula, err = f.GetCellFormula("Sheet1", "C2")
assert.NoError(t, err)
assert.Equal(t, "A2+B2", formula)
formula, err = f.GetCellFormula("Sheet1", "C6")
assert.NoError(t, err)
assert.Equal(t, "A6+B6", formula)
formulaType, ref = STCellFormulaTypeShared, "C2:C4"
assert.NoError(t, f.SetCellFormula("Sheet1", "C2", "=A2*B2", FormulaOpts{Ref: &ref, Type: &formulaType}))
formula, err = f.GetCellFormula("Sheet1", "C2")
assert.NoError(t, err)
assert.Equal(t, "A2*B2", formula)
formula, err = f.GetCellFormula("Sheet1", "C6")
assert.NoError(t, err)
assert.Empty(t, formula)
// Test get shared formula after remove refer cell formula
f = NewFile()
formulaType, ref = STCellFormulaTypeShared, "C2:C6"
assert.NoError(t, f.SetCellFormula("Sheet1", "C2", "=A2+B2", FormulaOpts{Ref: &ref, Type: &formulaType}))
assert.NoError(t, f.SetCellFormula("Sheet1", "C2", ""))
formula, err = f.GetCellFormula("Sheet1", "C2")
assert.NoError(t, err)
assert.Empty(t, formula)
formula, err = f.GetCellFormula("Sheet1", "C6")
assert.NoError(t, err)
assert.Empty(t, formula)
formulaType, ref = STCellFormulaTypeShared, "C2:C8"
assert.NoError(t, f.SetCellFormula("Sheet1", "C2", "=A2*B2", FormulaOpts{Ref: &ref, Type: &formulaType}))
formula, err = f.GetCellFormula("Sheet1", "C2")
assert.NoError(t, err)
assert.Equal(t, "A2*B2", formula)
formula, err = f.GetCellFormula("Sheet1", "C8")
assert.NoError(t, err)
assert.Equal(t, "A8*B8", formula)
assert.NoError(t, f.Close())
}
func TestConvertSharedFormula(t *testing.T) {
c := xlsxC{R: "A"}
_, err := c.convertSharedFormula("A")
assert.Equal(t, newCellNameToCoordinatesError("A", newInvalidCellNameError("A")), err)
_, err = c.convertSharedFormula("A1")
assert.Equal(t, newCellNameToCoordinatesError("A", newInvalidCellNameError("A")), err)
}
func ExampleFile_SetCellFloat() {

1
col.go
View File

@ -782,6 +782,7 @@ func (f *File) RemoveCol(sheet, col string) error {
if err != nil {
return err
}
ws.formulaSI.Clear()
for rowIdx := range ws.SheetData.Row {
rowData := &ws.SheetData.Row[rowIdx]
for colIdx := range rowData.C {

View File

@ -81,7 +81,7 @@ func TestDataValidation(t *testing.T) {
dv.Formula1 = ""
assert.NoError(t, dv.SetDropList(listValid),
"SetDropList failed for valid input %v", listValid)
assert.NotEqual(t, "", dv.Formula1,
assert.NotEmpty(t, dv.Formula1,
"Formula1 should not be empty for valid input %v", listValid)
}
assert.Equal(t, `"A&lt;,B&gt;,C"",D ,E',F"`, dv.Formula1)

View File

@ -86,13 +86,13 @@ func TestOpenFile(t *testing.T) {
f.SetActiveSheet(2)
// Test get cell formula with given rows number
_, err = f.GetCellFormula("Sheet1", "B19")
formula, err := f.GetCellFormula("Sheet1", "B19")
assert.NoError(t, err)
assert.Equal(t, "SUM(Sheet2!D2,Sheet2!D11)", formula)
// Test get cell formula with illegal worksheet name
_, err = f.GetCellFormula("Sheet2", "B20")
assert.NoError(t, err)
_, err = f.GetCellFormula("Sheet1", "B20")
formula, err = f.GetCellFormula("Sheet2", "B20")
assert.NoError(t, err)
assert.Empty(t, formula)
// Test get cell formula with illegal rows number
_, err = f.GetCellFormula("Sheet1", "B")
@ -1060,7 +1060,7 @@ func TestCopySheetError(t *testing.T) {
func TestGetSheetComments(t *testing.T) {
f := NewFile()
assert.Equal(t, "", f.getSheetComments("sheet0"))
assert.Empty(t, f.getSheetComments("sheet0"))
}
func TestGetActiveSheetIndex(t *testing.T) {
@ -1414,7 +1414,7 @@ func TestProtectSheet(t *testing.T) {
assert.NoError(t, f.UnprotectSheet(sheetName, "password"))
// Test protect worksheet with empty password
assert.NoError(t, f.ProtectSheet(sheetName, &SheetProtectionOptions{}))
assert.Equal(t, "", ws.SheetProtection.Password)
assert.Empty(t, ws.SheetProtection.Password)
// Test protect worksheet with password exceeds the limit length
assert.EqualError(t, f.ProtectSheet(sheetName, &SheetProtectionOptions{
AlgorithmName: "MD4",

2
go.mod
View File

@ -5,7 +5,7 @@ go 1.23.0
require (
github.com/richardlehane/mscfb v1.0.4
github.com/stretchr/testify v1.10.0
github.com/tiendc/go-deepcopy v1.5.1
github.com/tiendc/go-deepcopy v1.5.2
github.com/xuri/efp v0.0.0-20250227110027-3491fafc2b79
github.com/xuri/nfp v0.0.0-20250226145837-86d5fc24b2ba
golang.org/x/crypto v0.36.0

4
go.sum
View File

@ -9,8 +9,8 @@ github.com/richardlehane/msoleps v1.0.4 h1:WuESlvhX3gH2IHcd8UqyCuFY5yiq/GR/yqaSM
github.com/richardlehane/msoleps v1.0.4/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tiendc/go-deepcopy v1.5.1 h1:5ymXIB8ReIywehne6oy3HgywC8LicXYucPBNnj5QQxE=
github.com/tiendc/go-deepcopy v1.5.1/go.mod h1:toXoeQoUqXOOS/X4sKuiAoSk6elIdqc0pN7MTgOOo2I=
github.com/tiendc/go-deepcopy v1.5.2 h1:fzTSgAOzxw4MFuDzvyxRDUsdwA7qs7FBTvgXVj28NpQ=
github.com/tiendc/go-deepcopy v1.5.2/go.mod h1:toXoeQoUqXOOS/X4sKuiAoSk6elIdqc0pN7MTgOOo2I=
github.com/xuri/efp v0.0.0-20250227110027-3491fafc2b79 h1:78nKszZqigiBRBVcoe/AuPzyLTWW5B+ltBaUX1rlIXA=
github.com/xuri/efp v0.0.0-20250227110027-3491fafc2b79/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI=
github.com/xuri/nfp v0.0.0-20250226145837-86d5fc24b2ba h1:DhIu6n3qU0joqG9f4IO6a/Gkerd+flXrmlJ+0yX2W8U=

View File

@ -95,12 +95,12 @@ func TestColumnNumberToName_OK(t *testing.T) {
func TestColumnNumberToName_Error(t *testing.T) {
out, err := ColumnNumberToName(-1)
if assert.Error(t, err) {
assert.Equal(t, "", out)
assert.Empty(t, out)
}
out, err = ColumnNumberToName(0)
if assert.Error(t, err) {
assert.Equal(t, "", out)
assert.Empty(t, out)
}
_, err = ColumnNumberToName(MaxColumns + 1)

View File

@ -35,7 +35,7 @@ func TestMergeCell(t *testing.T) {
assert.NoError(t, err)
// Merged cell ref is single coordinate
value, err = f.GetCellValue("Sheet2", "A6")
assert.Equal(t, "", value)
assert.Empty(t, value)
assert.NoError(t, err)
value, err = f.GetCellFormula("Sheet1", "G12")
assert.Equal(t, "SUM(Sheet1!B19,Sheet1!C19)", value)
@ -104,7 +104,7 @@ func TestMergeCellOverlap(t *testing.T) {
assert.Len(t, mc, 1)
assert.Equal(t, "A1", mc[0].GetStartAxis())
assert.Equal(t, "D3", mc[0].GetEndAxis())
assert.Equal(t, "", mc[0].GetCellValue())
assert.Empty(t, mc[0].GetCellValue())
assert.NoError(t, f.Close())
}

View File

@ -632,11 +632,12 @@ func (f *File) RemoveRow(sheet string, row int) error {
if err != nil {
return err
}
ws.formulaSI.Clear()
if row > len(ws.SheetData.Row) {
return f.adjustHelper(sheet, rows, row, -1)
}
keep := 0
for rowIdx := 0; rowIdx < len(ws.SheetData.Row); rowIdx++ {
for rowIdx := range ws.SheetData.Row {
v := &ws.SheetData.Row[rowIdx]
if v.R != row {
ws.SheetData.Row[keep] = *v

View File

@ -586,16 +586,16 @@ func TestDuplicateRowZeroWithNoRows(t *testing.T) {
val, err := f.GetCellValue(sheet, "A1")
assert.NoError(t, err)
assert.Equal(t, "", val)
assert.Empty(t, val)
val, err = f.GetCellValue(sheet, "B1")
assert.NoError(t, err)
assert.Equal(t, "", val)
assert.Empty(t, val)
val, err = f.GetCellValue(sheet, "A2")
assert.NoError(t, err)
assert.Equal(t, "", val)
assert.Empty(t, val)
val, err = f.GetCellValue(sheet, "B2")
assert.NoError(t, err)
assert.Equal(t, "", val)
assert.Empty(t, val)
assert.NoError(t, err)
expect := map[string]string{
@ -971,7 +971,7 @@ func TestGetValueFromInlineStr(t *testing.T) {
d := &xlsxSST{}
val, err := c.getValueFrom(f, d, false)
assert.NoError(t, err)
assert.Equal(t, "", val)
assert.Empty(t, val)
}
func TestGetValueFromNumber(t *testing.T) {

View File

@ -418,8 +418,8 @@ func TestGetSheetName(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, "Sheet1", f.GetSheetName(0))
assert.Equal(t, "Sheet2", f.GetSheetName(1))
assert.Equal(t, "", f.GetSheetName(-1))
assert.Equal(t, "", f.GetSheetName(2))
assert.Empty(t, f.GetSheetName(-1))
assert.Empty(t, f.GetSheetName(2))
assert.NoError(t, f.Close())
}
@ -519,7 +519,7 @@ func TestWorksheetWriter(t *testing.T) {
func TestGetWorkbookPath(t *testing.T) {
f := NewFile()
f.Pkg.Delete("_rels/.rels")
assert.Equal(t, "", f.getWorkbookPath())
assert.Empty(t, f.getWorkbookPath())
}
func TestGetWorkbookRelsPath(t *testing.T) {
@ -786,7 +786,7 @@ func TestSheetDimension(t *testing.T) {
assert.NoError(t, err)
dimension, err = f.GetSheetDimension(sheetName)
assert.NoError(t, err)
assert.Equal(t, "", dimension)
assert.Empty(t, dimension)
// Test set the worksheet dimension
for _, excepted := range []string{"A1", "A1:D5", "A1:XFD1048576", "a1", "A1:d5"} {
err = f.SetSheetDimension(sheetName, excepted)

View File

@ -137,7 +137,7 @@ func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) {
f.streams[sheetXMLPath] = sw
_, _ = sw.rawData.WriteString(xml.Header + `<worksheet` + templateNamespaceIDMap)
bulkAppendFields(&sw.rawData, sw.worksheet, 2, 3)
bulkAppendFields(&sw.rawData, sw.worksheet, 3, 4)
return sw, err
}
@ -662,7 +662,7 @@ func writeCell(buf *bufferedWriter, c xlsxC) {
// sheetData XML start element to the buffer.
func (sw *StreamWriter) writeSheetData() {
if !sw.sheetWritten {
bulkAppendFields(&sw.rawData, sw.worksheet, 4, 5)
bulkAppendFields(&sw.rawData, sw.worksheet, 5, 6)
if sw.worksheet.Cols != nil {
_, _ = sw.rawData.WriteString("<cols>")
for _, col := range sw.worksheet.Cols.Col {
@ -694,7 +694,7 @@ func (sw *StreamWriter) writeSheetData() {
func (sw *StreamWriter) Flush() error {
sw.writeSheetData()
_, _ = sw.rawData.WriteString(`</sheetData>`)
bulkAppendFields(&sw.rawData, sw.worksheet, 8, 15)
bulkAppendFields(&sw.rawData, sw.worksheet, 9, 16)
mergeCells := strings.Builder{}
if sw.mergeCellsCount > 0 {
_, _ = mergeCells.WriteString(`<mergeCells count="`)
@ -704,9 +704,9 @@ func (sw *StreamWriter) Flush() error {
_, _ = mergeCells.WriteString(`</mergeCells>`)
}
_, _ = sw.rawData.WriteString(mergeCells.String())
bulkAppendFields(&sw.rawData, sw.worksheet, 17, 38)
bulkAppendFields(&sw.rawData, sw.worksheet, 18, 39)
_, _ = sw.rawData.WriteString(sw.tableParts)
bulkAppendFields(&sw.rawData, sw.worksheet, 40, 40)
bulkAppendFields(&sw.rawData, sw.worksheet, 41, 41)
_, _ = sw.rawData.WriteString(`</worksheet>`)
if err := sw.rawData.Flush(); err != nil {
return err

View File

@ -20,6 +20,7 @@ import (
// http://schemas.openxmlformats.org/spreadsheetml/2006/main.
type xlsxWorksheet struct {
mu sync.Mutex
formulaSI sync.Map
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/spreadsheetml/2006/main worksheet"`
SheetPr *xlsxSheetPr `xml:"sheetPr"`
Dimension *xlsxDimension `xml:"dimension"`