ref #1096, reduce memory usage by about 50% for large data spreadsheet

This commit is contained in:
xuri 2022-01-11 00:24:24 +08:00
parent 2245fccca0
commit 891e5baac1
No known key found for this signature in database
GPG Key ID: BA5E5BB1C948EDF7
6 changed files with 117 additions and 41 deletions

14
cell.go
View File

@ -375,8 +375,18 @@ func (f *File) sharedStringsLoader() (err error) {
if path, ok := f.tempFiles.Load(defaultXMLPathSharedStrings); ok {
f.Pkg.Store(defaultXMLPathSharedStrings, f.readBytes(defaultXMLPathSharedStrings))
f.tempFiles.Delete(defaultXMLPathSharedStrings)
err = os.Remove(path.(string))
f.SharedStrings, f.sharedStringItemMap = nil, nil
if err = os.Remove(path.(string)); err != nil {
return
}
f.SharedStrings = nil
}
if f.sharedStringTemp != nil {
if err := f.sharedStringTemp.Close(); err != nil {
return err
}
f.tempFiles.Delete(defaultTempFileSST)
f.sharedStringItem, err = nil, os.Remove(f.sharedStringTemp.Name())
f.sharedStringTemp = nil
}
return
}

View File

@ -2,6 +2,7 @@ package excelize
import (
"fmt"
"os"
"path/filepath"
"reflect"
"strconv"
@ -653,9 +654,12 @@ func TestFormattedValue2(t *testing.T) {
func TestSharedStringsError(t *testing.T) {
f, err := OpenFile(filepath.Join("test", "Book1.xlsx"), Options{UnzipXMLSizeLimit: 128})
assert.NoError(t, err)
tempFile, ok := f.tempFiles.Load(defaultXMLPathSharedStrings)
assert.True(t, ok)
f.tempFiles.Store(defaultXMLPathSharedStrings, "")
assert.Equal(t, "1", f.getFromStringItemMap(1))
assert.Equal(t, "1", f.getFromStringItem(1))
// Cleanup undelete temporary files
assert.NoError(t, os.Remove(tempFile.(string)))
// Test reload the file error on set cell cell and rich text. The error message was different between macOS and Windows.
err = f.SetCellValue("Sheet1", "A19", "A19")
assert.Error(t, err)
@ -663,6 +667,50 @@ func TestSharedStringsError(t *testing.T) {
f.tempFiles.Store(defaultXMLPathSharedStrings, "")
err = f.SetCellRichText("Sheet1", "A19", []RichTextRun{})
assert.Error(t, err)
assert.NoError(t, f.Close())
f, err = OpenFile(filepath.Join("test", "Book1.xlsx"), Options{UnzipXMLSizeLimit: 128})
assert.NoError(t, err)
rows, err := f.Rows("Sheet1")
assert.NoError(t, err)
const maxUint16 = 1<<16 - 1
for rows.Next() {
if rows.CurrentRow() == 19 {
_, err := rows.Columns()
assert.NoError(t, err)
// Test get cell value from string item with invalid offset
f.sharedStringItem[1] = []uint{maxUint16 - 1, maxUint16}
assert.Equal(t, "1", f.getFromStringItem(1))
break
}
}
assert.NoError(t, rows.Close())
// Test shared string item temporary files has been closed before close the workbook
assert.NoError(t, f.sharedStringTemp.Close())
assert.Error(t, f.Close())
// Cleanup undelete temporary files
f.tempFiles.Range(func(k, v interface{}) bool {
return assert.NoError(t, os.Remove(v.(string)))
})
f, err = OpenFile(filepath.Join("test", "Book1.xlsx"), Options{UnzipXMLSizeLimit: 128})
assert.NoError(t, err)
rows, err = f.Rows("Sheet1")
assert.NoError(t, err)
for rows.Next() {
if rows.CurrentRow() == 19 {
_, err := rows.Columns()
assert.NoError(t, err)
break
}
}
assert.NoError(t, rows.Close())
assert.NoError(t, f.sharedStringTemp.Close())
// Test shared string item temporary files has been closed before set the cell value
assert.Error(t, f.SetCellValue("Sheet1", "A1", "A1"))
assert.Error(t, f.Close())
// Cleanup undelete temporary files
f.tempFiles.Range(func(k, v interface{}) bool {
return assert.NoError(t, os.Remove(v.(string)))
})
}

View File

@ -32,30 +32,31 @@ import (
// File define a populated spreadsheet file struct.
type File struct {
sync.Mutex
options *Options
xmlAttr map[string][]xml.Attr
checked map[string]bool
sheetMap map[string]string
streams map[string]*StreamWriter
tempFiles sync.Map
CalcChain *xlsxCalcChain
Comments map[string]*xlsxComments
ContentTypes *xlsxTypes
Drawings sync.Map
Path string
SharedStrings *xlsxSST
sharedStringsMap map[string]int
sharedStringItemMap *sync.Map
Sheet sync.Map
SheetCount int
Styles *xlsxStyleSheet
Theme *xlsxTheme
DecodeVMLDrawing map[string]*decodeVmlDrawing
VMLDrawing map[string]*vmlDrawing
WorkBook *xlsxWorkbook
Relationships sync.Map
Pkg sync.Map
CharsetReader charsetTranscoderFn
options *Options
xmlAttr map[string][]xml.Attr
checked map[string]bool
sheetMap map[string]string
streams map[string]*StreamWriter
tempFiles sync.Map
CalcChain *xlsxCalcChain
Comments map[string]*xlsxComments
ContentTypes *xlsxTypes
Drawings sync.Map
Path string
SharedStrings *xlsxSST
sharedStringsMap map[string]int
sharedStringItem [][]uint
sharedStringTemp *os.File
Sheet sync.Map
SheetCount int
Styles *xlsxStyleSheet
Theme *xlsxTheme
DecodeVMLDrawing map[string]*decodeVmlDrawing
VMLDrawing map[string]*vmlDrawing
WorkBook *xlsxWorkbook
Relationships sync.Map
Pkg sync.Map
CharsetReader charsetTranscoderFn
}
type charsetTranscoderFn func(charset string, input io.Reader) (rdr io.Reader, err error)

View File

@ -85,6 +85,11 @@ func (f *File) SaveAs(name string, opt ...Options) error {
// Close closes and cleanup the open temporary file for the spreadsheet.
func (f *File) Close() error {
var err error
if f.sharedStringTemp != nil {
if err := f.sharedStringTemp.Close(); err != nil {
return err
}
}
f.tempFiles.Range(func(k, v interface{}) bool {
if err = os.Remove(v.(string)); err != nil {
return false

35
rows.go
View File

@ -16,12 +16,12 @@ import (
"encoding/xml"
"fmt"
"io"
"io/ioutil"
"log"
"math"
"math/big"
"os"
"strconv"
"sync"
"github.com/mohae/deepcopy"
)
@ -280,23 +280,30 @@ func (f *File) Rows(sheet string) (*Rows, error) {
return &rows, nil
}
// getFromStringItemMap build shared string item map from system temporary
// getFromStringItem build shared string item offset list from system temporary
// file at one time, and return value by given to string index.
func (f *File) getFromStringItemMap(index int) string {
if f.sharedStringItemMap != nil {
if value, ok := f.sharedStringItemMap.Load(index); ok {
return value.(string)
func (f *File) getFromStringItem(index int) string {
if f.sharedStringTemp != nil {
if len(f.sharedStringItem) <= index {
return strconv.Itoa(index)
}
return strconv.Itoa(index)
offsetRange := f.sharedStringItem[index]
buf := make([]byte, offsetRange[1]-offsetRange[0])
if _, err := f.sharedStringTemp.ReadAt(buf, int64(offsetRange[0])); err != nil {
return strconv.Itoa(index)
}
return string(buf)
}
f.sharedStringItemMap = &sync.Map{}
needClose, decoder, tempFile, err := f.xmlDecoder(defaultXMLPathSharedStrings)
if needClose && err == nil {
defer tempFile.Close()
}
f.sharedStringItem = [][]uint{}
f.sharedStringTemp, _ = ioutil.TempFile(os.TempDir(), "excelize-")
f.tempFiles.Store(defaultTempFileSST, f.sharedStringTemp.Name())
var (
inElement string
i int
i, offset uint
)
for {
token, _ := decoder.Token()
@ -309,12 +316,16 @@ func (f *File) getFromStringItemMap(index int) string {
if inElement == "si" {
si := xlsxSI{}
_ = decoder.DecodeElement(&si, &xmlElement)
f.sharedStringItemMap.Store(i, si.String())
startIdx := offset
n, _ := f.sharedStringTemp.WriteString(si.String())
offset += uint(n)
f.sharedStringItem = append(f.sharedStringItem, []uint{startIdx, offset})
i++
}
}
}
return f.getFromStringItemMap(index)
return f.getFromStringItem(index)
}
// xmlDecoder creates XML decoder by given path in the zip from memory data
@ -454,7 +465,7 @@ func (c *xlsxC) getValueFrom(f *File, d *xlsxSST, raw bool) (string, error) {
xlsxSI := 0
xlsxSI, _ = strconv.Atoi(c.V)
if _, ok := f.tempFiles.Load(defaultXMLPathSharedStrings); ok {
return f.formattedValue(c.S, f.getFromStringItemMap(xlsxSI), raw), nil
return f.formattedValue(c.S, f.getFromStringItem(xlsxSI), raw), nil
}
if len(d.SI) > xlsxSI {
return f.formattedValue(c.S, d.SI[xlsxSI].String(), raw), nil

View File

@ -30,6 +30,7 @@ const (
defaultXMLPathSharedStrings = "xl/sharedStrings.xml"
defaultXMLPathStyles = "xl/styles.xml"
defaultXMLPathWorkbook = "xl/workbook.xml"
defaultTempFileSST = "sharedStrings"
)
const templateDocpropsApp = `<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"><TotalTime>0</TotalTime><Application>Go Excelize</Application></Properties>`