ref #1096, reduce memory usage by about 50% for large data spreadsheet

This commit is contained in:
xuri 2022-01-11 00:24:24 +08:00
parent 2245fccca0
commit 891e5baac1
No known key found for this signature in database
GPG Key ID: BA5E5BB1C948EDF7
6 changed files with 117 additions and 41 deletions

14
cell.go
View File

@ -375,8 +375,18 @@ func (f *File) sharedStringsLoader() (err error) {
if path, ok := f.tempFiles.Load(defaultXMLPathSharedStrings); ok { if path, ok := f.tempFiles.Load(defaultXMLPathSharedStrings); ok {
f.Pkg.Store(defaultXMLPathSharedStrings, f.readBytes(defaultXMLPathSharedStrings)) f.Pkg.Store(defaultXMLPathSharedStrings, f.readBytes(defaultXMLPathSharedStrings))
f.tempFiles.Delete(defaultXMLPathSharedStrings) f.tempFiles.Delete(defaultXMLPathSharedStrings)
err = os.Remove(path.(string)) if err = os.Remove(path.(string)); err != nil {
f.SharedStrings, f.sharedStringItemMap = nil, nil return
}
f.SharedStrings = nil
}
if f.sharedStringTemp != nil {
if err := f.sharedStringTemp.Close(); err != nil {
return err
}
f.tempFiles.Delete(defaultTempFileSST)
f.sharedStringItem, err = nil, os.Remove(f.sharedStringTemp.Name())
f.sharedStringTemp = nil
} }
return return
} }

View File

@ -2,6 +2,7 @@ package excelize
import ( import (
"fmt" "fmt"
"os"
"path/filepath" "path/filepath"
"reflect" "reflect"
"strconv" "strconv"
@ -653,9 +654,12 @@ func TestFormattedValue2(t *testing.T) {
func TestSharedStringsError(t *testing.T) { func TestSharedStringsError(t *testing.T) {
f, err := OpenFile(filepath.Join("test", "Book1.xlsx"), Options{UnzipXMLSizeLimit: 128}) f, err := OpenFile(filepath.Join("test", "Book1.xlsx"), Options{UnzipXMLSizeLimit: 128})
assert.NoError(t, err) assert.NoError(t, err)
tempFile, ok := f.tempFiles.Load(defaultXMLPathSharedStrings)
assert.True(t, ok)
f.tempFiles.Store(defaultXMLPathSharedStrings, "") f.tempFiles.Store(defaultXMLPathSharedStrings, "")
assert.Equal(t, "1", f.getFromStringItemMap(1)) assert.Equal(t, "1", f.getFromStringItem(1))
// Cleanup undelete temporary files
assert.NoError(t, os.Remove(tempFile.(string)))
// Test reload the file error on set cell cell and rich text. The error message was different between macOS and Windows. // Test reload the file error on set cell cell and rich text. The error message was different between macOS and Windows.
err = f.SetCellValue("Sheet1", "A19", "A19") err = f.SetCellValue("Sheet1", "A19", "A19")
assert.Error(t, err) assert.Error(t, err)
@ -663,6 +667,50 @@ func TestSharedStringsError(t *testing.T) {
f.tempFiles.Store(defaultXMLPathSharedStrings, "") f.tempFiles.Store(defaultXMLPathSharedStrings, "")
err = f.SetCellRichText("Sheet1", "A19", []RichTextRun{}) err = f.SetCellRichText("Sheet1", "A19", []RichTextRun{})
assert.Error(t, err) assert.Error(t, err)
assert.NoError(t, f.Close()) assert.NoError(t, f.Close())
f, err = OpenFile(filepath.Join("test", "Book1.xlsx"), Options{UnzipXMLSizeLimit: 128})
assert.NoError(t, err)
rows, err := f.Rows("Sheet1")
assert.NoError(t, err)
const maxUint16 = 1<<16 - 1
for rows.Next() {
if rows.CurrentRow() == 19 {
_, err := rows.Columns()
assert.NoError(t, err)
// Test get cell value from string item with invalid offset
f.sharedStringItem[1] = []uint{maxUint16 - 1, maxUint16}
assert.Equal(t, "1", f.getFromStringItem(1))
break
}
}
assert.NoError(t, rows.Close())
// Test shared string item temporary files has been closed before close the workbook
assert.NoError(t, f.sharedStringTemp.Close())
assert.Error(t, f.Close())
// Cleanup undelete temporary files
f.tempFiles.Range(func(k, v interface{}) bool {
return assert.NoError(t, os.Remove(v.(string)))
})
f, err = OpenFile(filepath.Join("test", "Book1.xlsx"), Options{UnzipXMLSizeLimit: 128})
assert.NoError(t, err)
rows, err = f.Rows("Sheet1")
assert.NoError(t, err)
for rows.Next() {
if rows.CurrentRow() == 19 {
_, err := rows.Columns()
assert.NoError(t, err)
break
}
}
assert.NoError(t, rows.Close())
assert.NoError(t, f.sharedStringTemp.Close())
// Test shared string item temporary files has been closed before set the cell value
assert.Error(t, f.SetCellValue("Sheet1", "A1", "A1"))
assert.Error(t, f.Close())
// Cleanup undelete temporary files
f.tempFiles.Range(func(k, v interface{}) bool {
return assert.NoError(t, os.Remove(v.(string)))
})
} }

View File

@ -32,30 +32,31 @@ import (
// File define a populated spreadsheet file struct. // File define a populated spreadsheet file struct.
type File struct { type File struct {
sync.Mutex sync.Mutex
options *Options options *Options
xmlAttr map[string][]xml.Attr xmlAttr map[string][]xml.Attr
checked map[string]bool checked map[string]bool
sheetMap map[string]string sheetMap map[string]string
streams map[string]*StreamWriter streams map[string]*StreamWriter
tempFiles sync.Map tempFiles sync.Map
CalcChain *xlsxCalcChain CalcChain *xlsxCalcChain
Comments map[string]*xlsxComments Comments map[string]*xlsxComments
ContentTypes *xlsxTypes ContentTypes *xlsxTypes
Drawings sync.Map Drawings sync.Map
Path string Path string
SharedStrings *xlsxSST SharedStrings *xlsxSST
sharedStringsMap map[string]int sharedStringsMap map[string]int
sharedStringItemMap *sync.Map sharedStringItem [][]uint
Sheet sync.Map sharedStringTemp *os.File
SheetCount int Sheet sync.Map
Styles *xlsxStyleSheet SheetCount int
Theme *xlsxTheme Styles *xlsxStyleSheet
DecodeVMLDrawing map[string]*decodeVmlDrawing Theme *xlsxTheme
VMLDrawing map[string]*vmlDrawing DecodeVMLDrawing map[string]*decodeVmlDrawing
WorkBook *xlsxWorkbook VMLDrawing map[string]*vmlDrawing
Relationships sync.Map WorkBook *xlsxWorkbook
Pkg sync.Map Relationships sync.Map
CharsetReader charsetTranscoderFn Pkg sync.Map
CharsetReader charsetTranscoderFn
} }
type charsetTranscoderFn func(charset string, input io.Reader) (rdr io.Reader, err error) type charsetTranscoderFn func(charset string, input io.Reader) (rdr io.Reader, err error)

View File

@ -85,6 +85,11 @@ func (f *File) SaveAs(name string, opt ...Options) error {
// Close closes and cleanup the open temporary file for the spreadsheet. // Close closes and cleanup the open temporary file for the spreadsheet.
func (f *File) Close() error { func (f *File) Close() error {
var err error var err error
if f.sharedStringTemp != nil {
if err := f.sharedStringTemp.Close(); err != nil {
return err
}
}
f.tempFiles.Range(func(k, v interface{}) bool { f.tempFiles.Range(func(k, v interface{}) bool {
if err = os.Remove(v.(string)); err != nil { if err = os.Remove(v.(string)); err != nil {
return false return false

35
rows.go
View File

@ -16,12 +16,12 @@ import (
"encoding/xml" "encoding/xml"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"log" "log"
"math" "math"
"math/big" "math/big"
"os" "os"
"strconv" "strconv"
"sync"
"github.com/mohae/deepcopy" "github.com/mohae/deepcopy"
) )
@ -280,23 +280,30 @@ func (f *File) Rows(sheet string) (*Rows, error) {
return &rows, nil return &rows, nil
} }
// getFromStringItemMap build shared string item map from system temporary // getFromStringItem build shared string item offset list from system temporary
// file at one time, and return value by given to string index. // file at one time, and return value by given to string index.
func (f *File) getFromStringItemMap(index int) string { func (f *File) getFromStringItem(index int) string {
if f.sharedStringItemMap != nil { if f.sharedStringTemp != nil {
if value, ok := f.sharedStringItemMap.Load(index); ok { if len(f.sharedStringItem) <= index {
return value.(string) return strconv.Itoa(index)
} }
return strconv.Itoa(index) offsetRange := f.sharedStringItem[index]
buf := make([]byte, offsetRange[1]-offsetRange[0])
if _, err := f.sharedStringTemp.ReadAt(buf, int64(offsetRange[0])); err != nil {
return strconv.Itoa(index)
}
return string(buf)
} }
f.sharedStringItemMap = &sync.Map{}
needClose, decoder, tempFile, err := f.xmlDecoder(defaultXMLPathSharedStrings) needClose, decoder, tempFile, err := f.xmlDecoder(defaultXMLPathSharedStrings)
if needClose && err == nil { if needClose && err == nil {
defer tempFile.Close() defer tempFile.Close()
} }
f.sharedStringItem = [][]uint{}
f.sharedStringTemp, _ = ioutil.TempFile(os.TempDir(), "excelize-")
f.tempFiles.Store(defaultTempFileSST, f.sharedStringTemp.Name())
var ( var (
inElement string inElement string
i int i, offset uint
) )
for { for {
token, _ := decoder.Token() token, _ := decoder.Token()
@ -309,12 +316,16 @@ func (f *File) getFromStringItemMap(index int) string {
if inElement == "si" { if inElement == "si" {
si := xlsxSI{} si := xlsxSI{}
_ = decoder.DecodeElement(&si, &xmlElement) _ = decoder.DecodeElement(&si, &xmlElement)
f.sharedStringItemMap.Store(i, si.String())
startIdx := offset
n, _ := f.sharedStringTemp.WriteString(si.String())
offset += uint(n)
f.sharedStringItem = append(f.sharedStringItem, []uint{startIdx, offset})
i++ i++
} }
} }
} }
return f.getFromStringItemMap(index) return f.getFromStringItem(index)
} }
// xmlDecoder creates XML decoder by given path in the zip from memory data // xmlDecoder creates XML decoder by given path in the zip from memory data
@ -454,7 +465,7 @@ func (c *xlsxC) getValueFrom(f *File, d *xlsxSST, raw bool) (string, error) {
xlsxSI := 0 xlsxSI := 0
xlsxSI, _ = strconv.Atoi(c.V) xlsxSI, _ = strconv.Atoi(c.V)
if _, ok := f.tempFiles.Load(defaultXMLPathSharedStrings); ok { if _, ok := f.tempFiles.Load(defaultXMLPathSharedStrings); ok {
return f.formattedValue(c.S, f.getFromStringItemMap(xlsxSI), raw), nil return f.formattedValue(c.S, f.getFromStringItem(xlsxSI), raw), nil
} }
if len(d.SI) > xlsxSI { if len(d.SI) > xlsxSI {
return f.formattedValue(c.S, d.SI[xlsxSI].String(), raw), nil return f.formattedValue(c.S, d.SI[xlsxSI].String(), raw), nil

View File

@ -30,6 +30,7 @@ const (
defaultXMLPathSharedStrings = "xl/sharedStrings.xml" defaultXMLPathSharedStrings = "xl/sharedStrings.xml"
defaultXMLPathStyles = "xl/styles.xml" defaultXMLPathStyles = "xl/styles.xml"
defaultXMLPathWorkbook = "xl/workbook.xml" defaultXMLPathWorkbook = "xl/workbook.xml"
defaultTempFileSST = "sharedStrings"
) )
const templateDocpropsApp = `<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"><TotalTime>0</TotalTime><Application>Go Excelize</Application></Properties>` const templateDocpropsApp = `<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"><TotalTime>0</TotalTime><Application>Go Excelize</Application></Properties>`