From 2fb135bc94bbb0c487563d166fd24786fab7280a Mon Sep 17 00:00:00 2001 From: xuri Date: Fri, 5 Feb 2021 22:52:31 +0800 Subject: [PATCH] handle end element event in the worksheet row/column iterator XML SAX parser --- col.go | 107 ++++++++++++++++++++++++++++++--------------------- col_test.go | 14 ++++++- rows.go | 107 ++++++++++++++++++++++++++++++--------------------- rows_test.go | 4 ++ 4 files changed, 144 insertions(+), 88 deletions(-) diff --git a/col.go b/col.go index 5d912291..9d46733e 100644 --- a/col.go +++ b/col.go @@ -97,20 +97,20 @@ func (cols *Cols) Rows() ([]string, error) { if token == nil { break } - switch startElement := token.(type) { + switch xmlElement := token.(type) { case xml.StartElement: - inElement = startElement.Name.Local + inElement = xmlElement.Name.Local if inElement == "row" { cellCol = 0 cellRow++ - attrR, _ := attrValToInt("r", startElement.Attr) + attrR, _ := attrValToInt("r", xmlElement.Attr) if attrR != 0 { cellRow = attrR } } if inElement == "c" { cellCol++ - for _, attr := range startElement.Attr { + for _, attr := range xmlElement.Attr { if attr.Name.Local == "r" { if cellCol, cellRow, err = CellNameToCoordinates(attr.Value); err != nil { return rows, err @@ -123,14 +123,59 @@ func (cols *Cols) Rows() ([]string, error) { } if cellCol == cols.curCol { colCell := xlsxC{} - _ = decoder.DecodeElement(&colCell, &startElement) + _ = decoder.DecodeElement(&colCell, &xmlElement) val, _ := colCell.getValueFrom(cols.f, d) rows = append(rows, val) } } + case xml.EndElement: + if xmlElement.Name.Local == "sheetData" { + return rows, err + } } } - return rows, nil + return rows, err +} + +// columnXMLIterator defined runtime use field for the worksheet column SAX parser. +type columnXMLIterator struct { + err error + inElement string + cols Cols + cellCol, curRow, row int +} + +// columnXMLHandler parse the column XML element of the worksheet. +func columnXMLHandler(colIterator *columnXMLIterator, xmlElement *xml.StartElement) { + colIterator.err = nil + inElement := xmlElement.Name.Local + if inElement == "row" { + colIterator.row++ + for _, attr := range xmlElement.Attr { + if attr.Name.Local == "r" { + if colIterator.curRow, colIterator.err = strconv.Atoi(attr.Value); colIterator.err != nil { + return + } + colIterator.row = colIterator.curRow + } + } + colIterator.cols.totalRow = colIterator.row + colIterator.cellCol = 0 + } + if inElement == "c" { + colIterator.cellCol++ + for _, attr := range xmlElement.Attr { + if attr.Name.Local == "r" { + if colIterator.cellCol, _, colIterator.err = CellNameToCoordinates(attr.Value); colIterator.err != nil { + return + } + } + } + if colIterator.cellCol > colIterator.cols.totalCol { + colIterator.cols.totalCol = colIterator.cellCol + } + } + return } // Cols returns a columns iterator, used for streaming reading data for a @@ -161,53 +206,29 @@ func (f *File) Cols(sheet string) (*Cols, error) { output, _ := xml.Marshal(f.Sheet[name]) f.saveFileList(name, f.replaceNameSpaceBytes(name, output)) } - var ( - inElement string - cols Cols - cellCol, curRow, row int - err error - ) - cols.sheetXML = f.readXML(name) - decoder := f.xmlNewDecoder(bytes.NewReader(cols.sheetXML)) + var colIterator columnXMLIterator + colIterator.cols.sheetXML = f.readXML(name) + decoder := f.xmlNewDecoder(bytes.NewReader(colIterator.cols.sheetXML)) for { token, _ := decoder.Token() if token == nil { break } - switch startElement := token.(type) { + switch xmlElement := token.(type) { case xml.StartElement: - inElement = startElement.Name.Local - if inElement == "row" { - row++ - for _, attr := range startElement.Attr { - if attr.Name.Local == "r" { - if curRow, err = strconv.Atoi(attr.Value); err != nil { - return &cols, err - } - row = curRow - } - } - cols.totalRow = row - cellCol = 0 + columnXMLHandler(&colIterator, &xmlElement) + if colIterator.err != nil { + return &colIterator.cols, colIterator.err } - if inElement == "c" { - cellCol++ - for _, attr := range startElement.Attr { - if attr.Name.Local == "r" { - if cellCol, _, err = CellNameToCoordinates(attr.Value); err != nil { - return &cols, err - } - } - } - if cellCol > cols.totalCol { - cols.totalCol = cellCol - } + case xml.EndElement: + if xmlElement.Name.Local == "sheetData" { + colIterator.cols.f = f + colIterator.cols.sheet = trimSheetName(sheet) + return &colIterator.cols, nil } } } - cols.f = f - cols.sheet = trimSheetName(sheet) - return &cols, nil + return &colIterator.cols, nil } // GetColVisible provides a function to get visible of a single column by given diff --git a/col_test.go b/col_test.go index 532f4285..97c4b7ff 100644 --- a/col_test.go +++ b/col_test.go @@ -148,10 +148,20 @@ func TestColsRows(t *testing.T) { }, } - cols.stashCol, cols.curCol = 0, 1 + f = NewFile() + f.XLSX["xl/worksheets/sheet1.xml"] = nil cols, err = f.Cols("Sheet1") + if !assert.NoError(t, err) { + t.FailNow() + } + f = NewFile() + cols, err = f.Cols("Sheet1") + if !assert.NoError(t, err) { + t.FailNow() + } + _, err = cols.Rows() assert.NoError(t, err) - + cols.stashCol, cols.curCol = 0, 1 // Test if token is nil cols.sheetXML = nil _, err = cols.Rows() diff --git a/rows.go b/rows.go index 97cf2f59..702d8f52 100644 --- a/rows.go +++ b/rows.go @@ -78,60 +78,48 @@ func (rows *Rows) Error() error { // Columns return the current row's column values. func (rows *Rows) Columns() ([]string, error) { - var ( - err error - inElement string - attrR, cellCol, row int - columns []string - ) - + var rowIterator rowXMLIterator if rows.stashRow >= rows.curRow { - return columns, err + return rowIterator.columns, rowIterator.err } - - d := rows.f.sharedStringsReader() + rowIterator.rows = rows + rowIterator.d = rows.f.sharedStringsReader() for { token, _ := rows.decoder.Token() if token == nil { break } - switch startElement := token.(type) { + switch xmlElement := token.(type) { case xml.StartElement: - inElement = startElement.Name.Local - if inElement == "row" { - row++ - if attrR, err = attrValToInt("r", startElement.Attr); attrR != 0 { - row = attrR + rowIterator.inElement = xmlElement.Name.Local + if rowIterator.inElement == "row" { + rowIterator.row++ + if rowIterator.attrR, rowIterator.err = attrValToInt("r", xmlElement.Attr); rowIterator.attrR != 0 { + rowIterator.row = rowIterator.attrR } - if row > rows.curRow { - rows.stashRow = row - 1 - return columns, err + if rowIterator.row > rowIterator.rows.curRow { + rowIterator.rows.stashRow = rowIterator.row - 1 + return rowIterator.columns, rowIterator.err } } - if inElement == "c" { - cellCol++ - colCell := xlsxC{} - _ = rows.decoder.DecodeElement(&colCell, &startElement) - if colCell.R != "" { - if cellCol, _, err = CellNameToCoordinates(colCell.R); err != nil { - return columns, err - } - } - blank := cellCol - len(columns) - val, _ := colCell.getValueFrom(rows.f, d) - columns = append(appendSpace(blank, columns), val) + rowXMLHandler(&rowIterator, &xmlElement) + if rowIterator.err != nil { + return rowIterator.columns, rowIterator.err } case xml.EndElement: - inElement = startElement.Name.Local - if row == 0 { - row = rows.curRow + rowIterator.inElement = xmlElement.Name.Local + if rowIterator.row == 0 { + rowIterator.row = rowIterator.rows.curRow } - if inElement == "row" && row+1 < rows.curRow { - return columns, err + if rowIterator.inElement == "row" && rowIterator.row+1 < rowIterator.rows.curRow { + return rowIterator.columns, rowIterator.err + } + if rowIterator.inElement == "sheetData" { + return rowIterator.columns, rowIterator.err } } } - return columns, err + return rowIterator.columns, rowIterator.err } // appendSpace append blank characters to slice by given length and source slice. @@ -151,6 +139,35 @@ func (err ErrSheetNotExist) Error() string { return fmt.Sprintf("sheet %s is not exist", string(err.SheetName)) } +// rowXMLIterator defined runtime use field for the worksheet row SAX parser. +type rowXMLIterator struct { + err error + inElement string + attrR, cellCol, row int + columns []string + rows *Rows + d *xlsxSST +} + +// rowXMLHandler parse the row XML element of the worksheet. +func rowXMLHandler(rowIterator *rowXMLIterator, xmlElement *xml.StartElement) { + rowIterator.err = nil + if rowIterator.inElement == "c" { + rowIterator.cellCol++ + colCell := xlsxC{} + _ = rowIterator.rows.decoder.DecodeElement(&colCell, xmlElement) + if colCell.R != "" { + if rowIterator.cellCol, _, rowIterator.err = CellNameToCoordinates(colCell.R); rowIterator.err != nil { + return + } + } + blank := rowIterator.cellCol - len(rowIterator.columns) + val, _ := colCell.getValueFrom(rowIterator.rows.f, rowIterator.d) + rowIterator.columns = append(appendSpace(blank, rowIterator.columns), val) + } + return +} + // Rows returns a rows iterator, used for streaming reading data for a // worksheet with a large data. For example: // @@ -192,12 +209,12 @@ func (f *File) Rows(sheet string) (*Rows, error) { if token == nil { break } - switch startElement := token.(type) { + switch xmlElement := token.(type) { case xml.StartElement: - inElement = startElement.Name.Local + inElement = xmlElement.Name.Local if inElement == "row" { row++ - for _, attr := range startElement.Attr { + for _, attr := range xmlElement.Attr { if attr.Name.Local == "r" { row, err = strconv.Atoi(attr.Value) if err != nil { @@ -207,12 +224,16 @@ func (f *File) Rows(sheet string) (*Rows, error) { } rows.totalRow = row } + case xml.EndElement: + if xmlElement.Name.Local == "sheetData" { + rows.f = f + rows.sheet = name + rows.decoder = f.xmlNewDecoder(bytes.NewReader(f.readXML(name))) + return &rows, nil + } default: } } - rows.f = f - rows.sheet = name - rows.decoder = f.xmlNewDecoder(bytes.NewReader(f.readXML(name))) return &rows, nil } diff --git a/rows_test.go b/rows_test.go index 73931aa3..0e250f60 100644 --- a/rows_test.go +++ b/rows_test.go @@ -46,6 +46,10 @@ func TestRows(t *testing.T) { f.XLSX["xl/worksheets/sheet1.xml"] = []byte(`1B`) _, err = f.Rows("Sheet1") assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`) + + f.XLSX["xl/worksheets/sheet1.xml"] = nil + _, err = f.Rows("Sheet1") + assert.NoError(t, err) } func TestRowsIterator(t *testing.T) {