handle end element event in the worksheet row/column iterator XML SAX parser

This commit is contained in:
xuri 2021-02-05 22:52:31 +08:00
parent 66d85dae13
commit 2fb135bc94
No known key found for this signature in database
GPG Key ID: BA5E5BB1C948EDF7
4 changed files with 144 additions and 88 deletions

107
col.go
View File

@ -97,20 +97,20 @@ func (cols *Cols) Rows() ([]string, error) {
if token == nil {
break
}
switch startElement := token.(type) {
switch xmlElement := token.(type) {
case xml.StartElement:
inElement = startElement.Name.Local
inElement = xmlElement.Name.Local
if inElement == "row" {
cellCol = 0
cellRow++
attrR, _ := attrValToInt("r", startElement.Attr)
attrR, _ := attrValToInt("r", xmlElement.Attr)
if attrR != 0 {
cellRow = attrR
}
}
if inElement == "c" {
cellCol++
for _, attr := range startElement.Attr {
for _, attr := range xmlElement.Attr {
if attr.Name.Local == "r" {
if cellCol, cellRow, err = CellNameToCoordinates(attr.Value); err != nil {
return rows, err
@ -123,14 +123,59 @@ func (cols *Cols) Rows() ([]string, error) {
}
if cellCol == cols.curCol {
colCell := xlsxC{}
_ = decoder.DecodeElement(&colCell, &startElement)
_ = decoder.DecodeElement(&colCell, &xmlElement)
val, _ := colCell.getValueFrom(cols.f, d)
rows = append(rows, val)
}
}
case xml.EndElement:
if xmlElement.Name.Local == "sheetData" {
return rows, err
}
}
}
return rows, nil
return rows, err
}
// columnXMLIterator defined runtime use field for the worksheet column SAX parser.
type columnXMLIterator struct {
err error
inElement string
cols Cols
cellCol, curRow, row int
}
// columnXMLHandler parse the column XML element of the worksheet.
func columnXMLHandler(colIterator *columnXMLIterator, xmlElement *xml.StartElement) {
colIterator.err = nil
inElement := xmlElement.Name.Local
if inElement == "row" {
colIterator.row++
for _, attr := range xmlElement.Attr {
if attr.Name.Local == "r" {
if colIterator.curRow, colIterator.err = strconv.Atoi(attr.Value); colIterator.err != nil {
return
}
colIterator.row = colIterator.curRow
}
}
colIterator.cols.totalRow = colIterator.row
colIterator.cellCol = 0
}
if inElement == "c" {
colIterator.cellCol++
for _, attr := range xmlElement.Attr {
if attr.Name.Local == "r" {
if colIterator.cellCol, _, colIterator.err = CellNameToCoordinates(attr.Value); colIterator.err != nil {
return
}
}
}
if colIterator.cellCol > colIterator.cols.totalCol {
colIterator.cols.totalCol = colIterator.cellCol
}
}
return
}
// Cols returns a columns iterator, used for streaming reading data for a
@ -161,53 +206,29 @@ func (f *File) Cols(sheet string) (*Cols, error) {
output, _ := xml.Marshal(f.Sheet[name])
f.saveFileList(name, f.replaceNameSpaceBytes(name, output))
}
var (
inElement string
cols Cols
cellCol, curRow, row int
err error
)
cols.sheetXML = f.readXML(name)
decoder := f.xmlNewDecoder(bytes.NewReader(cols.sheetXML))
var colIterator columnXMLIterator
colIterator.cols.sheetXML = f.readXML(name)
decoder := f.xmlNewDecoder(bytes.NewReader(colIterator.cols.sheetXML))
for {
token, _ := decoder.Token()
if token == nil {
break
}
switch startElement := token.(type) {
switch xmlElement := token.(type) {
case xml.StartElement:
inElement = startElement.Name.Local
if inElement == "row" {
row++
for _, attr := range startElement.Attr {
if attr.Name.Local == "r" {
if curRow, err = strconv.Atoi(attr.Value); err != nil {
return &cols, err
}
row = curRow
}
}
cols.totalRow = row
cellCol = 0
columnXMLHandler(&colIterator, &xmlElement)
if colIterator.err != nil {
return &colIterator.cols, colIterator.err
}
if inElement == "c" {
cellCol++
for _, attr := range startElement.Attr {
if attr.Name.Local == "r" {
if cellCol, _, err = CellNameToCoordinates(attr.Value); err != nil {
return &cols, err
}
}
}
if cellCol > cols.totalCol {
cols.totalCol = cellCol
}
case xml.EndElement:
if xmlElement.Name.Local == "sheetData" {
colIterator.cols.f = f
colIterator.cols.sheet = trimSheetName(sheet)
return &colIterator.cols, nil
}
}
}
cols.f = f
cols.sheet = trimSheetName(sheet)
return &cols, nil
return &colIterator.cols, nil
}
// GetColVisible provides a function to get visible of a single column by given

View File

@ -148,10 +148,20 @@ func TestColsRows(t *testing.T) {
},
}
cols.stashCol, cols.curCol = 0, 1
f = NewFile()
f.XLSX["xl/worksheets/sheet1.xml"] = nil
cols, err = f.Cols("Sheet1")
if !assert.NoError(t, err) {
t.FailNow()
}
f = NewFile()
cols, err = f.Cols("Sheet1")
if !assert.NoError(t, err) {
t.FailNow()
}
_, err = cols.Rows()
assert.NoError(t, err)
cols.stashCol, cols.curCol = 0, 1
// Test if token is nil
cols.sheetXML = nil
_, err = cols.Rows()

107
rows.go
View File

@ -78,60 +78,48 @@ func (rows *Rows) Error() error {
// Columns return the current row's column values.
func (rows *Rows) Columns() ([]string, error) {
var (
err error
inElement string
attrR, cellCol, row int
columns []string
)
var rowIterator rowXMLIterator
if rows.stashRow >= rows.curRow {
return columns, err
return rowIterator.columns, rowIterator.err
}
d := rows.f.sharedStringsReader()
rowIterator.rows = rows
rowIterator.d = rows.f.sharedStringsReader()
for {
token, _ := rows.decoder.Token()
if token == nil {
break
}
switch startElement := token.(type) {
switch xmlElement := token.(type) {
case xml.StartElement:
inElement = startElement.Name.Local
if inElement == "row" {
row++
if attrR, err = attrValToInt("r", startElement.Attr); attrR != 0 {
row = attrR
rowIterator.inElement = xmlElement.Name.Local
if rowIterator.inElement == "row" {
rowIterator.row++
if rowIterator.attrR, rowIterator.err = attrValToInt("r", xmlElement.Attr); rowIterator.attrR != 0 {
rowIterator.row = rowIterator.attrR
}
if row > rows.curRow {
rows.stashRow = row - 1
return columns, err
if rowIterator.row > rowIterator.rows.curRow {
rowIterator.rows.stashRow = rowIterator.row - 1
return rowIterator.columns, rowIterator.err
}
}
if inElement == "c" {
cellCol++
colCell := xlsxC{}
_ = rows.decoder.DecodeElement(&colCell, &startElement)
if colCell.R != "" {
if cellCol, _, err = CellNameToCoordinates(colCell.R); err != nil {
return columns, err
}
}
blank := cellCol - len(columns)
val, _ := colCell.getValueFrom(rows.f, d)
columns = append(appendSpace(blank, columns), val)
rowXMLHandler(&rowIterator, &xmlElement)
if rowIterator.err != nil {
return rowIterator.columns, rowIterator.err
}
case xml.EndElement:
inElement = startElement.Name.Local
if row == 0 {
row = rows.curRow
rowIterator.inElement = xmlElement.Name.Local
if rowIterator.row == 0 {
rowIterator.row = rowIterator.rows.curRow
}
if inElement == "row" && row+1 < rows.curRow {
return columns, err
if rowIterator.inElement == "row" && rowIterator.row+1 < rowIterator.rows.curRow {
return rowIterator.columns, rowIterator.err
}
if rowIterator.inElement == "sheetData" {
return rowIterator.columns, rowIterator.err
}
}
}
return columns, err
return rowIterator.columns, rowIterator.err
}
// appendSpace append blank characters to slice by given length and source slice.
@ -151,6 +139,35 @@ func (err ErrSheetNotExist) Error() string {
return fmt.Sprintf("sheet %s is not exist", string(err.SheetName))
}
// rowXMLIterator defined runtime use field for the worksheet row SAX parser.
type rowXMLIterator struct {
err error
inElement string
attrR, cellCol, row int
columns []string
rows *Rows
d *xlsxSST
}
// rowXMLHandler parse the row XML element of the worksheet.
func rowXMLHandler(rowIterator *rowXMLIterator, xmlElement *xml.StartElement) {
rowIterator.err = nil
if rowIterator.inElement == "c" {
rowIterator.cellCol++
colCell := xlsxC{}
_ = rowIterator.rows.decoder.DecodeElement(&colCell, xmlElement)
if colCell.R != "" {
if rowIterator.cellCol, _, rowIterator.err = CellNameToCoordinates(colCell.R); rowIterator.err != nil {
return
}
}
blank := rowIterator.cellCol - len(rowIterator.columns)
val, _ := colCell.getValueFrom(rowIterator.rows.f, rowIterator.d)
rowIterator.columns = append(appendSpace(blank, rowIterator.columns), val)
}
return
}
// Rows returns a rows iterator, used for streaming reading data for a
// worksheet with a large data. For example:
//
@ -192,12 +209,12 @@ func (f *File) Rows(sheet string) (*Rows, error) {
if token == nil {
break
}
switch startElement := token.(type) {
switch xmlElement := token.(type) {
case xml.StartElement:
inElement = startElement.Name.Local
inElement = xmlElement.Name.Local
if inElement == "row" {
row++
for _, attr := range startElement.Attr {
for _, attr := range xmlElement.Attr {
if attr.Name.Local == "r" {
row, err = strconv.Atoi(attr.Value)
if err != nil {
@ -207,12 +224,16 @@ func (f *File) Rows(sheet string) (*Rows, error) {
}
rows.totalRow = row
}
case xml.EndElement:
if xmlElement.Name.Local == "sheetData" {
rows.f = f
rows.sheet = name
rows.decoder = f.xmlNewDecoder(bytes.NewReader(f.readXML(name)))
return &rows, nil
}
default:
}
}
rows.f = f
rows.sheet = name
rows.decoder = f.xmlNewDecoder(bytes.NewReader(f.readXML(name)))
return &rows, nil
}

View File

@ -46,6 +46,10 @@ func TestRows(t *testing.T) {
f.XLSX["xl/worksheets/sheet1.xml"] = []byte(`<worksheet><sheetData><row r="1"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`)
_, err = f.Rows("Sheet1")
assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`)
f.XLSX["xl/worksheets/sheet1.xml"] = nil
_, err = f.Rows("Sheet1")
assert.NoError(t, err)
}
func TestRowsIterator(t *testing.T) {