Merge pull request #653 from Jerome1337/648-column-iterator

Implement columns iterator
This commit is contained in:
xuri 2020-06-20 22:08:08 +08:00 committed by GitHub
commit dcb772d692
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 291 additions and 0 deletions

167
col.go
View File

@ -10,7 +10,10 @@
package excelize
import (
"bytes"
"encoding/xml"
"errors"
"fmt"
"math"
"strings"
@ -24,6 +27,170 @@ const (
EMU int = 9525
)
// Cols defines an iterator to a sheet
type Cols struct {
err error
curCol, totalCol, stashCol, totalRow int
sheet string
cols []xlsxCols
f *File
decoder *xml.Decoder
}
// GetCols return all the columns in a sheet by given worksheet name (case sensitive). For example:
//
// cols, err := f.Cols("Sheet1")
// if err != nil {
// fmt.Println(err)
// return
// }
// for cols.Next() {
// col, err := cols.Rows()
// if err != nil {
// fmt.Println(err)
// }
// for _, rowCell := range col {
// fmt.Print(rowCell, "\t")
// }
// fmt.Println()
// }
//
func (f *File) GetCols(sheet string) ([][]string, error) {
cols, err := f.Cols(sheet)
if err != nil {
return nil, err
}
results := make([][]string, 0, 64)
for cols.Next() {
if cols.Error() != nil {
break
}
col, err := cols.Rows()
if err != nil {
break
}
results = append(results, col)
}
return results, nil
}
// Next will return true if the next col element is found.
func (cols *Cols) Next() bool {
cols.curCol++
return cols.curCol <= cols.totalCol
}
// Error will return an error when the next col element is found.
func (cols *Cols) Error() error {
return cols.err
}
// Rows return the current column's row values
func (cols *Cols) Rows() ([]string, error) {
var (
err error
rows []string
)
if cols.stashCol >= cols.curCol {
return rows, err
}
for i := 1; i <= cols.totalRow; i++ {
colName, _ := ColumnNumberToName(cols.curCol)
val, _ := cols.f.GetCellValue(cols.sheet, fmt.Sprintf("%s%d", colName, i))
rows = append(rows, val)
}
return rows, nil
}
// Cols returns a columns iterator, used for streaming/reading data for a worksheet with a large data. For example:
//
// cols, err := f.Cols("Sheet1")
// if err != nil {
// fmt.Println(err)
// return
// }
// for cols.Next() {
// col, err := cols.Rows()
// if err != nil {
// fmt.Println(err)
// }
// for _, rowCell := range col {
// fmt.Print(rowCell, "\t")
// }
// fmt.Println()
// }
//
func (f *File) Cols(sheet string) (*Cols, error) {
name, ok := f.sheetMap[trimSheetName(sheet)]
if !ok {
return nil, ErrSheetNotExist{sheet}
}
if f.Sheet[name] != nil {
output, _ := xml.Marshal(f.Sheet[name])
f.saveFileList(name, replaceRelationshipsNameSpaceBytes(output))
}
var (
inElement string
cols Cols
colsNum, rowsNum []int
)
decoder := f.xmlNewDecoder(bytes.NewReader(f.readXML(name)))
for {
token, _ := decoder.Token()
if token == nil {
break
}
switch startElement := token.(type) {
case xml.StartElement:
inElement = startElement.Name.Local
if inElement == "dimension" {
colsNum = make([]int, 0)
rowsNum = make([]int, 0)
for _, attr := range startElement.Attr {
if attr.Name.Local == "ref" {
sheetCoordinates := attr.Value
if i := strings.Index(sheetCoordinates, ":"); i <= -1 {
return &cols, errors.New("Sheet coordinates are wrong")
}
coordinates := strings.Split(sheetCoordinates, ":")
for _, coordinate := range coordinates {
c, r, _ := SplitCellName(coordinate)
columnNum, _ := ColumnNameToNumber(c)
colsNum = append(colsNum, columnNum)
rowsNum = append(rowsNum, r)
}
}
}
cols.totalCol = colsNum[1] - (colsNum[0] - 1)
cols.totalRow = rowsNum[1] - (rowsNum[0] - 1)
}
default:
}
}
cols.f = f
cols.sheet = trimSheetName(sheet)
cols.decoder = f.xmlNewDecoder(bytes.NewReader(f.readXML(name)))
return &cols, nil
}
// GetColVisible provides a function to get visible of a single column by given
// worksheet name and column name. For example, get visible state of column D
// in Sheet1:

View File

@ -1,12 +1,136 @@
package excelize
import (
"bytes"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestCols(t *testing.T) {
const sheet2 = "Sheet2"
f, err := OpenFile(filepath.Join("test", "Book1.xlsx"))
if !assert.NoError(t, err) {
t.FailNow()
}
cols, err := f.Cols(sheet2)
if !assert.NoError(t, err) {
t.FailNow()
}
var collectedRows [][]string
for cols.Next() {
rows, err := cols.Rows()
assert.NoError(t, err)
collectedRows = append(collectedRows, trimSliceSpace(rows))
}
if !assert.NoError(t, cols.Error()) {
t.FailNow()
}
returnedColumns, err := f.GetCols(sheet2)
assert.NoError(t, err)
for i := range returnedColumns {
returnedColumns[i] = trimSliceSpace(returnedColumns[i])
}
if !assert.Equal(t, collectedRows, returnedColumns) {
t.FailNow()
}
f = NewFile()
cells := []string{"C2", "C3", "C4"}
for _, cell := range cells {
assert.NoError(t, f.SetCellValue("Sheet1", cell, 1))
}
_, err = f.Rows("Sheet1")
assert.NoError(t, err)
f.Sheet["xl/worksheets/sheet1.xml"] = &xlsxWorksheet{
Dimension: &xlsxDimension{
Ref: "C2:C4",
},
}
_, err = f.Rows("Sheet1")
assert.NoError(t, err)
}
func TestColumnsIterator(t *testing.T) {
const (
sheet2 = "Sheet2"
expectedNumCol = 4
)
f, err := OpenFile(filepath.Join("test", "Book1.xlsx"))
require.NoError(t, err)
cols, err := f.Cols(sheet2)
require.NoError(t, err)
var colCount int
for cols.Next() {
colCount++
require.True(t, colCount <= expectedNumCol, "colCount is greater than expected")
}
assert.Equal(t, expectedNumCol, colCount)
f = NewFile()
cells := []string{"C2", "C3", "C4", "D2", "D3", "D4"}
for _, cell := range cells {
assert.NoError(t, f.SetCellValue("Sheet1", cell, 1))
}
f.Sheet["xl/worksheets/sheet1.xml"] = &xlsxWorksheet{
Dimension: &xlsxDimension{
Ref: "C2:D4",
},
}
cols, err = f.Cols("Sheet1")
require.NoError(t, err)
colCount = 0
for cols.Next() {
colCount++
require.True(t, colCount <= 2, "colCount is greater than expected")
}
assert.Equal(t, 2, colCount)
}
func TestColsError(t *testing.T) {
xlsx, err := OpenFile(filepath.Join("test", "Book1.xlsx"))
if !assert.NoError(t, err) {
t.FailNow()
}
_, err = xlsx.Cols("SheetN")
assert.EqualError(t, err, "sheet SheetN is not exist")
}
func TestColsRows(t *testing.T) {
f := NewFile()
f.NewSheet("Sheet1")
cols, err := f.Cols("Sheet1")
assert.EqualError(t, err, `Sheet coordinates are wrong`)
assert.NoError(t, f.SetCellValue("Sheet1", "A1", 1))
f.Sheet["xl/worksheets/sheet1.xml"] = &xlsxWorksheet{
Dimension: &xlsxDimension{
Ref: "A1:A1",
},
}
cols.stashCol, cols.curCol = 0, 1
cols, err = f.Cols("Sheet1")
assert.NoError(t, err)
// Test if token is nil
cols.decoder = f.xmlNewDecoder(bytes.NewReader(nil))
_, err = cols.Rows()
assert.NoError(t, err)
}
func TestColumnVisibility(t *testing.T) {
t.Run("TestBook1", func(t *testing.T) {
f, err := prepareTestBook1()