diff --git a/col.go b/col.go index 6f76800..561cec9 100644 --- a/col.go +++ b/col.go @@ -10,7 +10,10 @@ package excelize import ( + "bytes" + "encoding/xml" "errors" + "fmt" "math" "strings" @@ -24,6 +27,170 @@ const ( EMU int = 9525 ) +// Cols defines an iterator to a sheet +type Cols struct { + err error + curCol, totalCol, stashCol, totalRow int + sheet string + cols []xlsxCols + f *File + decoder *xml.Decoder +} + +// GetCols return all the columns in a sheet by given worksheet name (case sensitive). For example: +// +// cols, err := f.Cols("Sheet1") +// if err != nil { +// fmt.Println(err) +// return +// } +// for cols.Next() { +// col, err := cols.Rows() +// if err != nil { +// fmt.Println(err) +// } +// for _, rowCell := range col { +// fmt.Print(rowCell, "\t") +// } +// fmt.Println() +// } +// +func (f *File) GetCols(sheet string) ([][]string, error) { + cols, err := f.Cols(sheet) + if err != nil { + return nil, err + } + + results := make([][]string, 0, 64) + + for cols.Next() { + if cols.Error() != nil { + break + } + + col, err := cols.Rows() + if err != nil { + break + } + + results = append(results, col) + } + + return results, nil +} + +// Next will return true if the next col element is found. +func (cols *Cols) Next() bool { + cols.curCol++ + + return cols.curCol <= cols.totalCol +} + +// Error will return an error when the next col element is found. +func (cols *Cols) Error() error { + return cols.err +} + +// Rows return the current column's row values +func (cols *Cols) Rows() ([]string, error) { + var ( + err error + rows []string + ) + + if cols.stashCol >= cols.curCol { + return rows, err + } + + for i := 1; i <= cols.totalRow; i++ { + colName, _ := ColumnNumberToName(cols.curCol) + val, _ := cols.f.GetCellValue(cols.sheet, fmt.Sprintf("%s%d", colName, i)) + rows = append(rows, val) + } + + return rows, nil +} + +// Cols returns a columns iterator, used for streaming/reading data for a worksheet with a large data. For example: +// +// cols, err := f.Cols("Sheet1") +// if err != nil { +// fmt.Println(err) +// return +// } +// for cols.Next() { +// col, err := cols.Rows() +// if err != nil { +// fmt.Println(err) +// } +// for _, rowCell := range col { +// fmt.Print(rowCell, "\t") +// } +// fmt.Println() +// } +// +func (f *File) Cols(sheet string) (*Cols, error) { + name, ok := f.sheetMap[trimSheetName(sheet)] + if !ok { + return nil, ErrSheetNotExist{sheet} + } + + if f.Sheet[name] != nil { + output, _ := xml.Marshal(f.Sheet[name]) + f.saveFileList(name, replaceRelationshipsNameSpaceBytes(output)) + } + + var ( + inElement string + cols Cols + colsNum, rowsNum []int + ) + decoder := f.xmlNewDecoder(bytes.NewReader(f.readXML(name))) + + for { + token, _ := decoder.Token() + if token == nil { + break + } + + switch startElement := token.(type) { + case xml.StartElement: + inElement = startElement.Name.Local + if inElement == "dimension" { + colsNum = make([]int, 0) + rowsNum = make([]int, 0) + + for _, attr := range startElement.Attr { + if attr.Name.Local == "ref" { + sheetCoordinates := attr.Value + if i := strings.Index(sheetCoordinates, ":"); i <= -1 { + return &cols, errors.New("Sheet coordinates are wrong") + } + + coordinates := strings.Split(sheetCoordinates, ":") + for _, coordinate := range coordinates { + c, r, _ := SplitCellName(coordinate) + columnNum, _ := ColumnNameToNumber(c) + colsNum = append(colsNum, columnNum) + rowsNum = append(rowsNum, r) + } + } + } + + cols.totalCol = colsNum[1] - (colsNum[0] - 1) + cols.totalRow = rowsNum[1] - (rowsNum[0] - 1) + } + default: + } + } + + cols.f = f + cols.sheet = trimSheetName(sheet) + cols.decoder = f.xmlNewDecoder(bytes.NewReader(f.readXML(name))) + + return &cols, nil +} + // GetColVisible provides a function to get visible of a single column by given // worksheet name and column name. For example, get visible state of column D // in Sheet1: diff --git a/col_test.go b/col_test.go index fcb1619..a130f2b 100644 --- a/col_test.go +++ b/col_test.go @@ -1,12 +1,136 @@ package excelize import ( + "bytes" "path/filepath" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) +func TestCols(t *testing.T) { + const sheet2 = "Sheet2" + + f, err := OpenFile(filepath.Join("test", "Book1.xlsx")) + if !assert.NoError(t, err) { + t.FailNow() + } + + cols, err := f.Cols(sheet2) + if !assert.NoError(t, err) { + t.FailNow() + } + + var collectedRows [][]string + for cols.Next() { + rows, err := cols.Rows() + assert.NoError(t, err) + collectedRows = append(collectedRows, trimSliceSpace(rows)) + } + if !assert.NoError(t, cols.Error()) { + t.FailNow() + } + + returnedColumns, err := f.GetCols(sheet2) + assert.NoError(t, err) + for i := range returnedColumns { + returnedColumns[i] = trimSliceSpace(returnedColumns[i]) + } + if !assert.Equal(t, collectedRows, returnedColumns) { + t.FailNow() + } + + f = NewFile() + cells := []string{"C2", "C3", "C4"} + for _, cell := range cells { + assert.NoError(t, f.SetCellValue("Sheet1", cell, 1)) + } + _, err = f.Rows("Sheet1") + assert.NoError(t, err) + + f.Sheet["xl/worksheets/sheet1.xml"] = &xlsxWorksheet{ + Dimension: &xlsxDimension{ + Ref: "C2:C4", + }, + } + _, err = f.Rows("Sheet1") + assert.NoError(t, err) +} + +func TestColumnsIterator(t *testing.T) { + const ( + sheet2 = "Sheet2" + expectedNumCol = 4 + ) + + f, err := OpenFile(filepath.Join("test", "Book1.xlsx")) + require.NoError(t, err) + + cols, err := f.Cols(sheet2) + require.NoError(t, err) + + var colCount int + for cols.Next() { + colCount++ + require.True(t, colCount <= expectedNumCol, "colCount is greater than expected") + } + assert.Equal(t, expectedNumCol, colCount) + + f = NewFile() + cells := []string{"C2", "C3", "C4", "D2", "D3", "D4"} + for _, cell := range cells { + assert.NoError(t, f.SetCellValue("Sheet1", cell, 1)) + } + f.Sheet["xl/worksheets/sheet1.xml"] = &xlsxWorksheet{ + Dimension: &xlsxDimension{ + Ref: "C2:D4", + }, + } + cols, err = f.Cols("Sheet1") + require.NoError(t, err) + + colCount = 0 + for cols.Next() { + colCount++ + require.True(t, colCount <= 2, "colCount is greater than expected") + } + assert.Equal(t, 2, colCount) +} + +func TestColsError(t *testing.T) { + xlsx, err := OpenFile(filepath.Join("test", "Book1.xlsx")) + if !assert.NoError(t, err) { + t.FailNow() + } + _, err = xlsx.Cols("SheetN") + assert.EqualError(t, err, "sheet SheetN is not exist") +} + +func TestColsRows(t *testing.T) { + f := NewFile() + f.NewSheet("Sheet1") + + cols, err := f.Cols("Sheet1") + assert.EqualError(t, err, `Sheet coordinates are wrong`) + + assert.NoError(t, f.SetCellValue("Sheet1", "A1", 1)) + f.Sheet["xl/worksheets/sheet1.xml"] = &xlsxWorksheet{ + Dimension: &xlsxDimension{ + Ref: "A1:A1", + }, + } + + cols.stashCol, cols.curCol = 0, 1 + cols, err = f.Cols("Sheet1") + assert.NoError(t, err) + + // Test if token is nil + cols.decoder = f.xmlNewDecoder(bytes.NewReader(nil)) + _, err = cols.Rows() + assert.NoError(t, err) +} + func TestColumnVisibility(t *testing.T) { t.Run("TestBook1", func(t *testing.T) { f, err := prepareTestBook1()