handle end element event in the worksheet row/column iterator XML SAX parser

This commit is contained in:
xuri 2021-02-05 22:52:31 +08:00
parent 66d85dae13
commit 2fb135bc94
No known key found for this signature in database
GPG Key ID: BA5E5BB1C948EDF7
4 changed files with 144 additions and 88 deletions

107
col.go
View File

@ -97,20 +97,20 @@ func (cols *Cols) Rows() ([]string, error) {
if token == nil { if token == nil {
break break
} }
switch startElement := token.(type) { switch xmlElement := token.(type) {
case xml.StartElement: case xml.StartElement:
inElement = startElement.Name.Local inElement = xmlElement.Name.Local
if inElement == "row" { if inElement == "row" {
cellCol = 0 cellCol = 0
cellRow++ cellRow++
attrR, _ := attrValToInt("r", startElement.Attr) attrR, _ := attrValToInt("r", xmlElement.Attr)
if attrR != 0 { if attrR != 0 {
cellRow = attrR cellRow = attrR
} }
} }
if inElement == "c" { if inElement == "c" {
cellCol++ cellCol++
for _, attr := range startElement.Attr { for _, attr := range xmlElement.Attr {
if attr.Name.Local == "r" { if attr.Name.Local == "r" {
if cellCol, cellRow, err = CellNameToCoordinates(attr.Value); err != nil { if cellCol, cellRow, err = CellNameToCoordinates(attr.Value); err != nil {
return rows, err return rows, err
@ -123,14 +123,59 @@ func (cols *Cols) Rows() ([]string, error) {
} }
if cellCol == cols.curCol { if cellCol == cols.curCol {
colCell := xlsxC{} colCell := xlsxC{}
_ = decoder.DecodeElement(&colCell, &startElement) _ = decoder.DecodeElement(&colCell, &xmlElement)
val, _ := colCell.getValueFrom(cols.f, d) val, _ := colCell.getValueFrom(cols.f, d)
rows = append(rows, val) rows = append(rows, val)
} }
} }
case xml.EndElement:
if xmlElement.Name.Local == "sheetData" {
return rows, err
}
} }
} }
return rows, nil return rows, err
}
// columnXMLIterator defined runtime use field for the worksheet column SAX parser.
type columnXMLIterator struct {
err error
inElement string
cols Cols
cellCol, curRow, row int
}
// columnXMLHandler parse the column XML element of the worksheet.
func columnXMLHandler(colIterator *columnXMLIterator, xmlElement *xml.StartElement) {
colIterator.err = nil
inElement := xmlElement.Name.Local
if inElement == "row" {
colIterator.row++
for _, attr := range xmlElement.Attr {
if attr.Name.Local == "r" {
if colIterator.curRow, colIterator.err = strconv.Atoi(attr.Value); colIterator.err != nil {
return
}
colIterator.row = colIterator.curRow
}
}
colIterator.cols.totalRow = colIterator.row
colIterator.cellCol = 0
}
if inElement == "c" {
colIterator.cellCol++
for _, attr := range xmlElement.Attr {
if attr.Name.Local == "r" {
if colIterator.cellCol, _, colIterator.err = CellNameToCoordinates(attr.Value); colIterator.err != nil {
return
}
}
}
if colIterator.cellCol > colIterator.cols.totalCol {
colIterator.cols.totalCol = colIterator.cellCol
}
}
return
} }
// Cols returns a columns iterator, used for streaming reading data for a // Cols returns a columns iterator, used for streaming reading data for a
@ -161,53 +206,29 @@ func (f *File) Cols(sheet string) (*Cols, error) {
output, _ := xml.Marshal(f.Sheet[name]) output, _ := xml.Marshal(f.Sheet[name])
f.saveFileList(name, f.replaceNameSpaceBytes(name, output)) f.saveFileList(name, f.replaceNameSpaceBytes(name, output))
} }
var ( var colIterator columnXMLIterator
inElement string colIterator.cols.sheetXML = f.readXML(name)
cols Cols decoder := f.xmlNewDecoder(bytes.NewReader(colIterator.cols.sheetXML))
cellCol, curRow, row int
err error
)
cols.sheetXML = f.readXML(name)
decoder := f.xmlNewDecoder(bytes.NewReader(cols.sheetXML))
for { for {
token, _ := decoder.Token() token, _ := decoder.Token()
if token == nil { if token == nil {
break break
} }
switch startElement := token.(type) { switch xmlElement := token.(type) {
case xml.StartElement: case xml.StartElement:
inElement = startElement.Name.Local columnXMLHandler(&colIterator, &xmlElement)
if inElement == "row" { if colIterator.err != nil {
row++ return &colIterator.cols, colIterator.err
for _, attr := range startElement.Attr {
if attr.Name.Local == "r" {
if curRow, err = strconv.Atoi(attr.Value); err != nil {
return &cols, err
}
row = curRow
}
}
cols.totalRow = row
cellCol = 0
} }
if inElement == "c" { case xml.EndElement:
cellCol++ if xmlElement.Name.Local == "sheetData" {
for _, attr := range startElement.Attr { colIterator.cols.f = f
if attr.Name.Local == "r" { colIterator.cols.sheet = trimSheetName(sheet)
if cellCol, _, err = CellNameToCoordinates(attr.Value); err != nil { return &colIterator.cols, nil
return &cols, err
}
}
}
if cellCol > cols.totalCol {
cols.totalCol = cellCol
}
} }
} }
} }
cols.f = f return &colIterator.cols, nil
cols.sheet = trimSheetName(sheet)
return &cols, nil
} }
// GetColVisible provides a function to get visible of a single column by given // GetColVisible provides a function to get visible of a single column by given

View File

@ -148,10 +148,20 @@ func TestColsRows(t *testing.T) {
}, },
} }
cols.stashCol, cols.curCol = 0, 1 f = NewFile()
f.XLSX["xl/worksheets/sheet1.xml"] = nil
cols, err = f.Cols("Sheet1") cols, err = f.Cols("Sheet1")
if !assert.NoError(t, err) {
t.FailNow()
}
f = NewFile()
cols, err = f.Cols("Sheet1")
if !assert.NoError(t, err) {
t.FailNow()
}
_, err = cols.Rows()
assert.NoError(t, err) assert.NoError(t, err)
cols.stashCol, cols.curCol = 0, 1
// Test if token is nil // Test if token is nil
cols.sheetXML = nil cols.sheetXML = nil
_, err = cols.Rows() _, err = cols.Rows()

107
rows.go
View File

@ -78,60 +78,48 @@ func (rows *Rows) Error() error {
// Columns return the current row's column values. // Columns return the current row's column values.
func (rows *Rows) Columns() ([]string, error) { func (rows *Rows) Columns() ([]string, error) {
var ( var rowIterator rowXMLIterator
err error
inElement string
attrR, cellCol, row int
columns []string
)
if rows.stashRow >= rows.curRow { if rows.stashRow >= rows.curRow {
return columns, err return rowIterator.columns, rowIterator.err
} }
rowIterator.rows = rows
d := rows.f.sharedStringsReader() rowIterator.d = rows.f.sharedStringsReader()
for { for {
token, _ := rows.decoder.Token() token, _ := rows.decoder.Token()
if token == nil { if token == nil {
break break
} }
switch startElement := token.(type) { switch xmlElement := token.(type) {
case xml.StartElement: case xml.StartElement:
inElement = startElement.Name.Local rowIterator.inElement = xmlElement.Name.Local
if inElement == "row" { if rowIterator.inElement == "row" {
row++ rowIterator.row++
if attrR, err = attrValToInt("r", startElement.Attr); attrR != 0 { if rowIterator.attrR, rowIterator.err = attrValToInt("r", xmlElement.Attr); rowIterator.attrR != 0 {
row = attrR rowIterator.row = rowIterator.attrR
} }
if row > rows.curRow { if rowIterator.row > rowIterator.rows.curRow {
rows.stashRow = row - 1 rowIterator.rows.stashRow = rowIterator.row - 1
return columns, err return rowIterator.columns, rowIterator.err
} }
} }
if inElement == "c" { rowXMLHandler(&rowIterator, &xmlElement)
cellCol++ if rowIterator.err != nil {
colCell := xlsxC{} return rowIterator.columns, rowIterator.err
_ = rows.decoder.DecodeElement(&colCell, &startElement)
if colCell.R != "" {
if cellCol, _, err = CellNameToCoordinates(colCell.R); err != nil {
return columns, err
}
}
blank := cellCol - len(columns)
val, _ := colCell.getValueFrom(rows.f, d)
columns = append(appendSpace(blank, columns), val)
} }
case xml.EndElement: case xml.EndElement:
inElement = startElement.Name.Local rowIterator.inElement = xmlElement.Name.Local
if row == 0 { if rowIterator.row == 0 {
row = rows.curRow rowIterator.row = rowIterator.rows.curRow
} }
if inElement == "row" && row+1 < rows.curRow { if rowIterator.inElement == "row" && rowIterator.row+1 < rowIterator.rows.curRow {
return columns, err return rowIterator.columns, rowIterator.err
}
if rowIterator.inElement == "sheetData" {
return rowIterator.columns, rowIterator.err
} }
} }
} }
return columns, err return rowIterator.columns, rowIterator.err
} }
// appendSpace append blank characters to slice by given length and source slice. // appendSpace append blank characters to slice by given length and source slice.
@ -151,6 +139,35 @@ func (err ErrSheetNotExist) Error() string {
return fmt.Sprintf("sheet %s is not exist", string(err.SheetName)) return fmt.Sprintf("sheet %s is not exist", string(err.SheetName))
} }
// rowXMLIterator defined runtime use field for the worksheet row SAX parser.
type rowXMLIterator struct {
err error
inElement string
attrR, cellCol, row int
columns []string
rows *Rows
d *xlsxSST
}
// rowXMLHandler parse the row XML element of the worksheet.
func rowXMLHandler(rowIterator *rowXMLIterator, xmlElement *xml.StartElement) {
rowIterator.err = nil
if rowIterator.inElement == "c" {
rowIterator.cellCol++
colCell := xlsxC{}
_ = rowIterator.rows.decoder.DecodeElement(&colCell, xmlElement)
if colCell.R != "" {
if rowIterator.cellCol, _, rowIterator.err = CellNameToCoordinates(colCell.R); rowIterator.err != nil {
return
}
}
blank := rowIterator.cellCol - len(rowIterator.columns)
val, _ := colCell.getValueFrom(rowIterator.rows.f, rowIterator.d)
rowIterator.columns = append(appendSpace(blank, rowIterator.columns), val)
}
return
}
// Rows returns a rows iterator, used for streaming reading data for a // Rows returns a rows iterator, used for streaming reading data for a
// worksheet with a large data. For example: // worksheet with a large data. For example:
// //
@ -192,12 +209,12 @@ func (f *File) Rows(sheet string) (*Rows, error) {
if token == nil { if token == nil {
break break
} }
switch startElement := token.(type) { switch xmlElement := token.(type) {
case xml.StartElement: case xml.StartElement:
inElement = startElement.Name.Local inElement = xmlElement.Name.Local
if inElement == "row" { if inElement == "row" {
row++ row++
for _, attr := range startElement.Attr { for _, attr := range xmlElement.Attr {
if attr.Name.Local == "r" { if attr.Name.Local == "r" {
row, err = strconv.Atoi(attr.Value) row, err = strconv.Atoi(attr.Value)
if err != nil { if err != nil {
@ -207,12 +224,16 @@ func (f *File) Rows(sheet string) (*Rows, error) {
} }
rows.totalRow = row rows.totalRow = row
} }
case xml.EndElement:
if xmlElement.Name.Local == "sheetData" {
rows.f = f
rows.sheet = name
rows.decoder = f.xmlNewDecoder(bytes.NewReader(f.readXML(name)))
return &rows, nil
}
default: default:
} }
} }
rows.f = f
rows.sheet = name
rows.decoder = f.xmlNewDecoder(bytes.NewReader(f.readXML(name)))
return &rows, nil return &rows, nil
} }

View File

@ -46,6 +46,10 @@ func TestRows(t *testing.T) {
f.XLSX["xl/worksheets/sheet1.xml"] = []byte(`<worksheet><sheetData><row r="1"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`) f.XLSX["xl/worksheets/sheet1.xml"] = []byte(`<worksheet><sheetData><row r="1"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`)
_, err = f.Rows("Sheet1") _, err = f.Rows("Sheet1")
assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`) assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`)
f.XLSX["xl/worksheets/sheet1.xml"] = nil
_, err = f.Rows("Sheet1")
assert.NoError(t, err)
} }
func TestRowsIterator(t *testing.T) { func TestRowsIterator(t *testing.T) {