Add Tabular Diff for CSV files (#14661)

Implements request #14320 The rendering of CSV files does match the diff style.

* Moved CSV logic into base package.

* Added method to create a tabular diff.

* Added CSV compare context.

* Added CSV diff template.

* Use new table style in CSV markup.

* Added file size limit for CSV rendering.

* Display CSV parser errors in diff.

* Lazy read single file.

* Lazy read rows for full diff.

* Added unit tests for various CSV changes.
This commit is contained in:
KN4CK3R 2021-03-29 22:44:28 +02:00 committed by GitHub
parent d3b8127ad3
commit 0c6137617f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 937 additions and 118 deletions

View File

@ -248,6 +248,10 @@ EVENT_SOURCE_UPDATE_TIME = 10s
; Whether to render SVG files as images. If SVG rendering is disabled, SVG files are displayed as text and cannot be embedded in markdown files as images. ; Whether to render SVG files as images. If SVG rendering is disabled, SVG files are displayed as text and cannot be embedded in markdown files as images.
ENABLE_RENDER = true ENABLE_RENDER = true
[ui.csv]
; Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit).
MAX_FILE_SIZE = 524288
[markdown] [markdown]
; Render soft line breaks as hard line breaks, which means a single newline character between ; Render soft line breaks as hard line breaks, which means a single newline character between
; paragraphs will cause a line break and adding trailing whitespace to paragraphs is not ; paragraphs will cause a line break and adding trailing whitespace to paragraphs is not

View File

@ -198,6 +198,10 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`.
- `ENABLE_RENDER`: **true**: Whether to render SVG files as images. If SVG rendering is disabled, SVG files are displayed as text and cannot be embedded in markdown files as images. - `ENABLE_RENDER`: **true**: Whether to render SVG files as images. If SVG rendering is disabled, SVG files are displayed as text and cannot be embedded in markdown files as images.
### UI - CSV Files (`ui.csv`)
- `MAX_FILE_SIZE`: **524288** (512kb): Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit).
## Markdown (`markdown`) ## Markdown (`markdown`)
- `ENABLE_HARD_LINE_BREAK_IN_COMMENTS`: **true**: Render soft line breaks as hard line breaks in comments, which - `ENABLE_HARD_LINE_BREAK_IN_COMMENTS`: **true**: Render soft line breaks as hard line breaks in comments, which

93
modules/csv/csv.go Normal file
View File

@ -0,0 +1,93 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package csv
import (
"bytes"
"encoding/csv"
"errors"
"regexp"
"strings"
"code.gitea.io/gitea/modules/translation"
"code.gitea.io/gitea/modules/util"
)
var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`)
// CreateReader creates a csv.Reader with the given delimiter.
func CreateReader(rawBytes []byte, delimiter rune) *csv.Reader {
rd := csv.NewReader(bytes.NewReader(rawBytes))
rd.Comma = delimiter
rd.TrimLeadingSpace = true
return rd
}
// CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader.
func CreateReaderAndGuessDelimiter(rawBytes []byte) *csv.Reader {
delimiter := guessDelimiter(rawBytes)
return CreateReader(rawBytes, delimiter)
}
// guessDelimiter scores the input CSV data against delimiters, and returns the best match.
// Reads at most 10k bytes & 10 lines.
func guessDelimiter(data []byte) rune {
maxLines := 10
maxBytes := util.Min(len(data), 1e4)
text := string(data[:maxBytes])
text = quoteRegexp.ReplaceAllLiteralString(text, "")
lines := strings.SplitN(text, "\n", maxLines+1)
lines = lines[:util.Min(maxLines, len(lines))]
delimiters := []rune{',', ';', '\t', '|', '@'}
bestDelim := delimiters[0]
bestScore := 0.0
for _, delim := range delimiters {
score := scoreDelimiter(lines, delim)
if score > bestScore {
bestScore = score
bestDelim = delim
}
}
return bestDelim
}
// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV.
func scoreDelimiter(lines []string, delim rune) float64 {
countTotal := 0
countLineMax := 0
linesNotEqual := 0
for _, line := range lines {
if len(line) == 0 {
continue
}
countLine := strings.Count(line, string(delim))
countTotal += countLine
if countLine != countLineMax {
if countLineMax != 0 {
linesNotEqual++
}
countLineMax = util.Max(countLine, countLineMax)
}
}
return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines)))
}
// FormatError converts csv errors into readable messages.
func FormatError(err error, locale translation.Locale) (string, error) {
var perr *csv.ParseError
if errors.As(err, &perr) {
if perr.Err == csv.ErrFieldCount {
return locale.Tr("repo.error.csv.invalid_field_count", perr.Line), nil
}
return locale.Tr("repo.error.csv.unexpected", perr.Line, perr.Column), nil
}
return "", err
}

40
modules/csv/csv_test.go Normal file
View File

@ -0,0 +1,40 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package csv
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestCreateReader(t *testing.T) {
rd := CreateReader([]byte{}, ',')
assert.Equal(t, ',', rd.Comma)
}
func TestCreateReaderAndGuessDelimiter(t *testing.T) {
input := "a;b;c\n1;2;3\n4;5;6"
rd := CreateReaderAndGuessDelimiter([]byte(input))
assert.Equal(t, ';', rd.Comma)
}
func TestGuessDelimiter(t *testing.T) {
var kases = map[string]rune{
"a": ',',
"1,2": ',',
"1;2": ';',
"1\t2": '\t',
"1|2": '|',
"1,2,3;4,5,6;7,8,9\na;b;c": ';',
"\"1,2,3,4\";\"a\nb\"\nc;d": ';',
"<br/>": ',',
}
for k, v := range kases {
assert.EqualValues(t, guessDelimiter([]byte(k)), v)
}
}

View File

@ -6,24 +6,20 @@ package markup
import ( import (
"bytes" "bytes"
"encoding/csv"
"html" "html"
"io" "io"
"regexp" "strconv"
"strings"
"code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/setting"
) )
var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`)
func init() { func init() {
markup.RegisterParser(Parser{}) markup.RegisterParser(Parser{})
} }
// Parser implements markup.Parser for orgmode // Parser implements markup.Parser for csv files
type Parser struct { type Parser struct {
} }
@ -38,11 +34,35 @@ func (Parser) Extensions() []string {
} }
// Render implements markup.Parser // Render implements markup.Parser
func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte {
rd := csv.NewReader(bytes.NewReader(rawBytes))
rd.Comma = p.bestDelimiter(rawBytes)
var tmpBlock bytes.Buffer var tmpBlock bytes.Buffer
tmpBlock.WriteString(`<table class="table">`)
if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < int64(len(rawBytes)) {
tmpBlock.WriteString("<pre>")
tmpBlock.WriteString(html.EscapeString(string(rawBytes)))
tmpBlock.WriteString("</pre>")
return tmpBlock.Bytes()
}
rd := csv.CreateReaderAndGuessDelimiter(rawBytes)
writeField := func(element, class, field string) {
tmpBlock.WriteString("<")
tmpBlock.WriteString(element)
if len(class) > 0 {
tmpBlock.WriteString(" class=\"")
tmpBlock.WriteString(class)
tmpBlock.WriteString("\"")
}
tmpBlock.WriteString(">")
tmpBlock.WriteString(html.EscapeString(field))
tmpBlock.WriteString("</")
tmpBlock.WriteString(element)
tmpBlock.WriteString(">")
}
tmpBlock.WriteString(`<table class="data-table">`)
row := 1
for { for {
fields, err := rd.Read() fields, err := rd.Read()
if err == io.EOF { if err == io.EOF {
@ -52,62 +72,19 @@ func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]strin
continue continue
} }
tmpBlock.WriteString("<tr>") tmpBlock.WriteString("<tr>")
element := "td"
if row == 1 {
element = "th"
}
writeField(element, "line-num", strconv.Itoa(row))
for _, field := range fields { for _, field := range fields {
tmpBlock.WriteString("<td>") writeField(element, "", field)
tmpBlock.WriteString(html.EscapeString(field))
tmpBlock.WriteString("</td>")
} }
tmpBlock.WriteString("</tr>") tmpBlock.WriteString("</tr>")
row++
} }
tmpBlock.WriteString("</table>") tmpBlock.WriteString("</table>")
return tmpBlock.Bytes() return tmpBlock.Bytes()
} }
// bestDelimiter scores the input CSV data against delimiters, and returns the best match.
// Reads at most 10k bytes & 10 lines.
func (p Parser) bestDelimiter(data []byte) rune {
maxLines := 10
maxBytes := util.Min(len(data), 1e4)
text := string(data[:maxBytes])
text = quoteRegexp.ReplaceAllLiteralString(text, "")
lines := strings.SplitN(text, "\n", maxLines+1)
lines = lines[:util.Min(maxLines, len(lines))]
delimiters := []rune{',', ';', '\t', '|'}
bestDelim := delimiters[0]
bestScore := 0.0
for _, delim := range delimiters {
score := p.scoreDelimiter(lines, delim)
if score > bestScore {
bestScore = score
bestDelim = delim
}
}
return bestDelim
}
// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV
func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) {
countTotal := 0
countLineMax := 0
linesNotEqual := 0
for _, line := range lines {
if len(line) == 0 {
continue
}
countLine := strings.Count(line, string(delim))
countTotal += countLine
if countLine != countLineMax {
if countLineMax != 0 {
linesNotEqual++
}
countLineMax = util.Max(countLine, countLineMax)
}
}
return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines)))
}

View File

@ -13,14 +13,10 @@ import (
func TestRenderCSV(t *testing.T) { func TestRenderCSV(t *testing.T) {
var parser Parser var parser Parser
var kases = map[string]string{ var kases = map[string]string{
"a": "<table class=\"table\"><tr><td>a</td></tr></table>", "a": "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>a</th></tr></table>",
"1,2": "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>", "1,2": "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>1</th><th>2</th></tr></table>",
"1;2": "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>", "1;2\n3;4": "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>1</th><th>2</th></tr><tr><td class=\"line-num\">2</td><td>3</td><td>4</td></tr></table>",
"1\t2": "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>", "<br/>": "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>&lt;br/&gt;</th></tr></table>",
"1|2": "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>",
"1,2,3;4,5,6;7,8,9\na;b;c": "<table class=\"table\"><tr><td>1,2,3</td><td>4,5,6</td><td>7,8,9</td></tr><tr><td>a</td><td>b</td><td>c</td></tr></table>",
"\"1,2,3,4\";\"a\nb\"\nc;d": "<table class=\"table\"><tr><td>1,2,3,4</td><td>a\nb</td></tr><tr><td>c</td><td>d</td></tr></table>",
"<br/>": "<table class=\"table\"><tr><td>&lt;br/&gt;</td></tr></table>",
} }
for k, v := range kases { for k, v := range kases {

View File

@ -69,6 +69,10 @@ func ReplaceSanitizer() {
// Allow icons, emojis, and chroma syntax on span // Allow icons, emojis, and chroma syntax on span
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$`)).OnElements("span") sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$`)).OnElements("span")
// Allow data tables
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table")
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td")
// Allow generally safe attributes // Allow generally safe attributes
generalSafeAttrs := []string{"abbr", "accept", "accept-charset", generalSafeAttrs := []string{"abbr", "accept", "accept-charset",
"accesskey", "action", "align", "alt", "accesskey", "action", "align", "alt",

View File

@ -213,6 +213,10 @@ var (
Enabled bool `ini:"ENABLE_RENDER"` Enabled bool `ini:"ENABLE_RENDER"`
} `ini:"ui.svg"` } `ini:"ui.svg"`
CSV struct {
MaxFileSize int64
} `ini:"ui.csv"`
Admin struct { Admin struct {
UserPagingNum int UserPagingNum int
RepoPagingNum int RepoPagingNum int
@ -258,6 +262,11 @@ var (
}{ }{
Enabled: true, Enabled: true,
}, },
CSV: struct {
MaxFileSize int64
}{
MaxFileSize: 524288,
},
Admin: struct { Admin: struct {
UserPagingNum int UserPagingNum int
RepoPagingNum int RepoPagingNum int

View File

@ -1860,6 +1860,7 @@ diff.whitespace_ignore_at_eol = Ignore changes in whitespace at EOL
diff.stats_desc = <strong> %d changed files</strong> with <strong>%d additions</strong> and <strong>%d deletions</strong> diff.stats_desc = <strong> %d changed files</strong> with <strong>%d additions</strong> and <strong>%d deletions</strong>
diff.stats_desc_file = %d changes: %d additions and %d deletions diff.stats_desc_file = %d changes: %d additions and %d deletions
diff.bin = BIN diff.bin = BIN
diff.bin_not_shown = Binary file not shown.
diff.view_file = View File diff.view_file = View File
diff.file_before = Before diff.file_before = Before
diff.file_after = After diff.file_after = After
@ -1960,6 +1961,10 @@ topic.done = Done
topic.count_prompt = You can not select more than 25 topics topic.count_prompt = You can not select more than 25 topics
topic.format_prompt = Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long. topic.format_prompt = Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
error.csv.too_large = Can't render this file because it is too large.
error.csv.unexpected = Can't render this file because it contains an unexpected character in line %d and column %d.
error.csv.invalid_field_count = Can't render this file because it has a wrong number of fields in line %d.
[org] [org]
org_name_holder = Organization Name org_name_holder = Organization Name
org_full_name_holder = Organization Full Name org_full_name_holder = Organization Full Name

View File

@ -336,9 +336,8 @@ func Diff(ctx *context.Context) {
return return
} }
} }
setImageCompareContext(ctx, parentCommit, commit)
headTarget := path.Join(userName, repoName) headTarget := path.Join(userName, repoName)
setPathsCompareContext(ctx, parentCommit, commit, headTarget) setCompareContext(ctx, parentCommit, commit, headTarget)
ctx.Data["Title"] = commit.Summary() + " · " + base.ShortSha(commitID) ctx.Data["Title"] = commit.Summary() + " · " + base.ShortSha(commitID)
ctx.Data["Commit"] = commit ctx.Data["Commit"] = commit
verification := models.ParseCommitWithSignature(commit) verification := models.ParseCommitWithSignature(commit)

View File

@ -6,14 +6,20 @@ package repo
import ( import (
"bufio" "bufio"
"encoding/csv"
"errors"
"fmt" "fmt"
"html" "html"
"io/ioutil"
"path" "path"
"path/filepath"
"strings" "strings"
"code.gitea.io/gitea/models" "code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/context"
csv_module "code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
@ -26,6 +32,16 @@ const (
tplBlobExcerpt base.TplName = "repo/diff/blob_excerpt" tplBlobExcerpt base.TplName = "repo/diff/blob_excerpt"
) )
// setCompareContext sets context data.
func setCompareContext(ctx *context.Context, base *git.Commit, head *git.Commit, headTarget string) {
ctx.Data["BaseCommit"] = base
ctx.Data["HeadCommit"] = head
setPathsCompareContext(ctx, base, head, headTarget)
setImageCompareContext(ctx, base, head)
setCsvCompareContext(ctx)
}
// setPathsCompareContext sets context data for source and raw paths // setPathsCompareContext sets context data for source and raw paths
func setPathsCompareContext(ctx *context.Context, base *git.Commit, head *git.Commit, headTarget string) { func setPathsCompareContext(ctx *context.Context, base *git.Commit, head *git.Commit, headTarget string) {
sourcePath := setting.AppSubURL + "/%s/src/commit/%s" sourcePath := setting.AppSubURL + "/%s/src/commit/%s"
@ -65,6 +81,73 @@ func setImageCompareContext(ctx *context.Context, base *git.Commit, head *git.Co
} }
} }
// setCsvCompareContext sets context data that is required by the CSV compare template
func setCsvCompareContext(ctx *context.Context) {
ctx.Data["IsCsvFile"] = func(diffFile *gitdiff.DiffFile) bool {
extension := strings.ToLower(filepath.Ext(diffFile.Name))
return extension == ".csv" || extension == ".tsv"
}
type CsvDiffResult struct {
Sections []*gitdiff.TableDiffSection
Error string
}
ctx.Data["CreateCsvDiff"] = func(diffFile *gitdiff.DiffFile, baseCommit *git.Commit, headCommit *git.Commit) CsvDiffResult {
if diffFile == nil || baseCommit == nil || headCommit == nil {
return CsvDiffResult{nil, ""}
}
errTooLarge := errors.New(ctx.Locale.Tr("repo.error.csv.too_large"))
csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, error) {
blob, err := c.GetBlobByPath(diffFile.Name)
if err != nil {
return nil, err
}
if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < blob.Size() {
return nil, errTooLarge
}
reader, err := blob.DataAsync()
if err != nil {
return nil, err
}
defer reader.Close()
b, err := ioutil.ReadAll(reader)
if err != nil {
return nil, err
}
b = charset.ToUTF8WithFallback(b)
return csv_module.CreateReaderAndGuessDelimiter(b), nil
}
baseReader, err := csvReaderFromCommit(baseCommit)
if err == errTooLarge {
return CsvDiffResult{nil, err.Error()}
}
headReader, err := csvReaderFromCommit(headCommit)
if err == errTooLarge {
return CsvDiffResult{nil, err.Error()}
}
sections, err := gitdiff.CreateCsvDiff(diffFile, baseReader, headReader)
if err != nil {
errMessage, err := csv_module.FormatError(err, ctx.Locale)
if err != nil {
log.Error("RenderCsvDiff failed: %v", err)
return CsvDiffResult{nil, ""}
}
return CsvDiffResult{nil, errMessage}
}
return CsvDiffResult{sections, ""}
}
}
// ParseCompareInfo parse compare info between two commit for preparing comparing references // ParseCompareInfo parse compare info between two commit for preparing comparing references
func ParseCompareInfo(ctx *context.Context) (*models.User, *models.Repository, *git.Repository, *git.CompareInfo, string, string) { func ParseCompareInfo(ctx *context.Context) (*models.User, *models.Repository, *git.Repository, *git.CompareInfo, string, string) {
baseRepo := ctx.Repo.Repository baseRepo := ctx.Repo.Repository
@ -490,9 +573,8 @@ func PrepareCompareDiff(
ctx.Data["Username"] = headUser.Name ctx.Data["Username"] = headUser.Name
ctx.Data["Reponame"] = headRepo.Name ctx.Data["Reponame"] = headRepo.Name
setImageCompareContext(ctx, baseCommit, headCommit)
headTarget := path.Join(headUser.Name, repo.Name) headTarget := path.Join(headUser.Name, repo.Name)
setPathsCompareContext(ctx, baseCommit, headCommit, headTarget) setCompareContext(ctx, baseCommit, headCommit, headTarget)
return false return false
} }

View File

@ -591,7 +591,6 @@ func ViewPullFiles(ctx *context.Context) {
gitRepo *git.Repository gitRepo *git.Repository
) )
var headTarget string
var prInfo *git.CompareInfo var prInfo *git.CompareInfo
if pull.HasMerged { if pull.HasMerged {
prInfo = PrepareMergedViewPullInfo(ctx, issue) prInfo = PrepareMergedViewPullInfo(ctx, issue)
@ -618,7 +617,6 @@ func ViewPullFiles(ctx *context.Context) {
startCommitID = prInfo.MergeBase startCommitID = prInfo.MergeBase
endCommitID = headCommitID endCommitID = headCommitID
headTarget = path.Join(ctx.Repo.Owner.Name, ctx.Repo.Repository.Name)
ctx.Data["Username"] = ctx.Repo.Owner.Name ctx.Data["Username"] = ctx.Repo.Owner.Name
ctx.Data["Reponame"] = ctx.Repo.Repository.Name ctx.Data["Reponame"] = ctx.Repo.Repository.Name
ctx.Data["AfterCommitID"] = endCommitID ctx.Data["AfterCommitID"] = endCommitID
@ -672,8 +670,8 @@ func ViewPullFiles(ctx *context.Context) {
} }
} }
setImageCompareContext(ctx, baseCommit, commit) headTarget := path.Join(ctx.Repo.Owner.Name, ctx.Repo.Repository.Name)
setPathsCompareContext(ctx, baseCommit, commit, headTarget) setCompareContext(ctx, baseCommit, commit, headTarget)
ctx.Data["RequireHighlightJS"] = true ctx.Data["RequireHighlightJS"] = true
ctx.Data["RequireSimpleMDE"] = true ctx.Data["RequireSimpleMDE"] = true

379
services/gitdiff/csv.go Normal file
View File

@ -0,0 +1,379 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package gitdiff
import (
"encoding/csv"
"errors"
"io"
"code.gitea.io/gitea/modules/util"
)
const unmappedColumn = -1
const maxRowsToInspect int = 10
const minRatioToMatch float32 = 0.8
// TableDiffCellType represents the type of a TableDiffCell.
type TableDiffCellType uint8
// TableDiffCellType possible values.
const (
TableDiffCellEqual TableDiffCellType = iota + 1
TableDiffCellChanged
TableDiffCellAdd
TableDiffCellDel
)
// TableDiffCell represents a cell of a TableDiffRow
type TableDiffCell struct {
LeftCell string
RightCell string
Type TableDiffCellType
}
// TableDiffRow represents a row of a TableDiffSection.
type TableDiffRow struct {
RowIdx int
Cells []*TableDiffCell
}
// TableDiffSection represents a section of a DiffFile.
type TableDiffSection struct {
Rows []*TableDiffRow
}
// csvReader wraps a csv.Reader which buffers the first rows.
type csvReader struct {
reader *csv.Reader
buffer [][]string
line int
eof bool
}
// createCsvReader creates a csvReader and fills the buffer
func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) {
csv := &csvReader{reader: reader}
csv.buffer = make([][]string, bufferRowCount)
for i := 0; i < bufferRowCount && !csv.eof; i++ {
row, err := csv.readNextRow()
if err != nil {
return nil, err
}
csv.buffer[i] = row
}
csv.line = bufferRowCount
return csv, nil
}
// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
func (csv *csvReader) GetRow(row int) ([]string, error) {
if row < len(csv.buffer) {
return csv.buffer[row], nil
}
if csv.eof {
return nil, nil
}
for {
fields, err := csv.readNextRow()
if err != nil {
return nil, err
}
if csv.eof {
return nil, nil
}
csv.line++
if csv.line-1 == row {
return fields, nil
}
}
}
func (csv *csvReader) readNextRow() ([]string, error) {
if csv.eof {
return nil, nil
}
row, err := csv.reader.Read()
if err != nil {
if err != io.EOF {
return nil, err
}
csv.eof = true
}
return row, nil
}
// CreateCsvDiff creates a tabular diff based on two CSV readers.
func CreateCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) {
if baseReader != nil && headReader != nil {
return createCsvDiff(diffFile, baseReader, headReader)
}
if baseReader != nil {
return createCsvDiffSingle(baseReader, TableDiffCellDel)
}
return createCsvDiffSingle(headReader, TableDiffCellAdd)
}
// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) {
var rows []*TableDiffRow
i := 1
for {
row, err := reader.Read()
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
cells := make([]*TableDiffCell, len(row))
for j := 0; j < len(row); j++ {
cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype}
}
rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells})
i++
}
return []*TableDiffSection{{Rows: rows}}, nil
}
func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) {
a, err := createCsvReader(baseReader, maxRowsToInspect)
if err != nil {
return nil, err
}
b, err := createCsvReader(headReader, maxRowsToInspect)
if err != nil {
return nil, err
}
a2b, b2a := getColumnMapping(a, b)
columns := len(a2b) + countUnmappedColumns(b2a)
if len(a2b) < len(b2a) {
columns = len(b2a) + countUnmappedColumns(a2b)
}
createDiffRow := func(aline int, bline int) (*TableDiffRow, error) {
cells := make([]*TableDiffCell, columns)
if aline == 0 || bline == 0 {
var (
row []string
celltype TableDiffCellType
err error
)
if bline == 0 {
row, err = a.GetRow(aline - 1)
celltype = TableDiffCellDel
} else {
row, err = b.GetRow(bline - 1)
celltype = TableDiffCellAdd
}
if err != nil {
return nil, err
}
if row == nil {
return nil, nil
}
for i := 0; i < len(row); i++ {
cells[i] = &TableDiffCell{LeftCell: row[i], Type: celltype}
}
return &TableDiffRow{RowIdx: bline, Cells: cells}, nil
}
arow, err := a.GetRow(aline - 1)
if err != nil {
return nil, err
}
brow, err := b.GetRow(bline - 1)
if err != nil {
return nil, err
}
if len(arow) == 0 && len(brow) == 0 {
return nil, nil
}
for i := 0; i < len(a2b); i++ {
acell, _ := getCell(arow, i)
if a2b[i] == unmappedColumn {
cells[i] = &TableDiffCell{LeftCell: acell, Type: TableDiffCellDel}
} else {
bcell, _ := getCell(brow, a2b[i])
celltype := TableDiffCellChanged
if acell == bcell {
celltype = TableDiffCellEqual
}
cells[i] = &TableDiffCell{LeftCell: acell, RightCell: bcell, Type: celltype}
}
}
for i := 0; i < len(b2a); i++ {
if b2a[i] == unmappedColumn {
bcell, _ := getCell(brow, i)
cells[i] = &TableDiffCell{LeftCell: bcell, Type: TableDiffCellAdd}
}
}
return &TableDiffRow{RowIdx: bline, Cells: cells}, nil
}
var sections []*TableDiffSection
for i, section := range diffFile.Sections {
var rows []*TableDiffRow
lines := tryMergeLines(section.Lines)
for j, line := range lines {
if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) {
diffRow, err := createDiffRow(1, 1)
if err != nil {
return nil, err
}
if diffRow != nil {
rows = append(rows, diffRow)
}
}
diffRow, err := createDiffRow(line[0], line[1])
if err != nil {
return nil, err
}
if diffRow != nil {
rows = append(rows, diffRow)
}
}
if len(rows) > 0 {
sections = append(sections, &TableDiffSection{Rows: rows})
}
}
return sections, nil
}
// getColumnMapping creates a mapping of columns between a and b
func getColumnMapping(a *csvReader, b *csvReader) ([]int, []int) {
arow, _ := a.GetRow(0)
brow, _ := b.GetRow(0)
a2b := []int{}
b2a := []int{}
if arow != nil {
a2b = make([]int, len(arow))
}
if brow != nil {
b2a = make([]int, len(brow))
}
for i := 0; i < len(b2a); i++ {
b2a[i] = unmappedColumn
}
bcol := 0
for i := 0; i < len(a2b); i++ {
a2b[i] = unmappedColumn
acell, ea := getCell(arow, i)
if ea == nil {
for j := bcol; j < len(b2a); j++ {
bcell, eb := getCell(brow, j)
if eb == nil && acell == bcell {
a2b[i] = j
b2a[j] = i
bcol = j + 1
break
}
}
}
}
tryMapColumnsByContent(a, a2b, b, b2a)
tryMapColumnsByContent(b, b2a, a, a2b)
return a2b, b2a
}
// tryMapColumnsByContent tries to map missing columns by the content of the first lines.
func tryMapColumnsByContent(a *csvReader, a2b []int, b *csvReader, b2a []int) {
start := 0
for i := 0; i < len(a2b); i++ {
if a2b[i] == unmappedColumn {
if b2a[start] == unmappedColumn {
rows := util.Min(maxRowsToInspect, util.Max(0, util.Min(len(a.buffer), len(b.buffer))-1))
same := 0
for j := 1; j <= rows; j++ {
acell, ea := getCell(a.buffer[j], i)
bcell, eb := getCell(b.buffer[j], start+1)
if ea == nil && eb == nil && acell == bcell {
same++
}
}
if (float32(same) / float32(rows)) > minRatioToMatch {
a2b[i] = start + 1
b2a[start+1] = i
}
}
}
start = a2b[i]
}
}
// getCell returns the specific cell or nil if not present.
func getCell(row []string, column int) (string, error) {
if column < len(row) {
return row[column], nil
}
return "", errors.New("Undefined column")
}
// countUnmappedColumns returns the count of unmapped columns.
func countUnmappedColumns(mapping []int) int {
count := 0
for i := 0; i < len(mapping); i++ {
if mapping[i] == unmappedColumn {
count++
}
}
return count
}
// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
func tryMergeLines(lines []*DiffLine) [][2]int {
ids := make([][2]int, len(lines))
i := 0
for _, line := range lines {
if line.Type != DiffLineSection {
ids[i][0] = line.LeftIdx
ids[i][1] = line.RightIdx
i++
}
}
ids = ids[:i]
result := make([][2]int, len(ids))
j := 0
for i = 0; i < len(ids); i++ {
if ids[i][0] == 0 {
if j > 0 && result[j-1][1] == 0 {
temp := j
for temp > 0 && result[temp-1][1] == 0 {
temp--
}
result[temp][1] = ids[i][1]
continue
}
}
result[j] = ids[i]
j++
}
return result[:j]
}

View File

@ -0,0 +1,119 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package gitdiff
import (
"encoding/csv"
"strings"
"testing"
csv_module "code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/setting"
"github.com/stretchr/testify/assert"
)
func TestCSVDiff(t *testing.T) {
var cases = []struct {
diff string
base string
head string
cells [][2]TableDiffCellType
}{
// case 0
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -0,0 +1,2 @@
+col1,col2
+a,a`,
base: "",
head: "col1,col2\na,a",
cells: [][2]TableDiffCellType{{TableDiffCellAdd, TableDiffCellAdd}, {TableDiffCellAdd, TableDiffCellAdd}},
},
// case 1
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,2 +1,3 @@
col1,col2
-a,a
+a,a
+b,b`,
base: "col1,col2\na,a",
head: "col1,col2\na,a\nb,b",
cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellAdd, TableDiffCellAdd}},
},
// case 2
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,3 +1,2 @@
col1,col2
-a,a
b,b`,
base: "col1,col2\na,a\nb,b",
head: "col1,col2\nb,b",
cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellDel, TableDiffCellDel}, {TableDiffCellEqual, TableDiffCellEqual}},
},
// case 3
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,2 +1,2 @@
col1,col2
-b,b
+b,c`,
base: "col1,col2\nb,b",
head: "col1,col2\nb,c",
cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellChanged}},
},
// case 4
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,2 +0,0 @@
-col1,col2
-b,c`,
base: "col1,col2\nb,c",
head: "",
cells: [][2]TableDiffCellType{{TableDiffCellDel, TableDiffCellDel}, {TableDiffCellDel, TableDiffCellDel}},
},
}
for n, c := range cases {
diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.diff))
if err != nil {
t.Errorf("ParsePatch failed: %s", err)
}
var baseReader *csv.Reader
if len(c.base) > 0 {
baseReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.base))
}
var headReader *csv.Reader
if len(c.head) > 0 {
headReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.head))
}
result, err := CreateCsvDiff(diff.Files[0], baseReader, headReader)
assert.NoError(t, err)
assert.Equal(t, 1, len(result), "case %d: should be one section", n)
section := result[0]
assert.Equal(t, len(c.cells), len(section.Rows), "case %d: should be %d rows", n, len(c.cells))
for i, row := range section.Rows {
assert.Equal(t, 2, len(row.Cells), "case %d: row %d should have two cells", n, i)
for j, cell := range row.Cells {
assert.Equal(t, c.cells[i][j], cell.Type, "case %d: row %d cell %d should be equal", n, i, j)
}
}
}
}

View File

@ -79,6 +79,8 @@
{{else}} {{else}}
{{$isImage = (call $.IsImageFileInHead $file.Name)}} {{$isImage = (call $.IsImageFileInHead $file.Name)}}
{{end}} {{end}}
{{$isCsv := (call $.IsCsvFile $file)}}
{{$showFileViewToggle := or $isImage $isCsv}}
<a role="button" class="fold-file muted mr-2"> <a role="button" class="fold-file muted mr-2">
{{svg "octicon-chevron-down" 18}} {{svg "octicon-chevron-down" 18}}
</a> </a>
@ -92,6 +94,12 @@
<span class="file mono">{{if $file.IsRenamed}}{{$file.OldName}} &rarr; {{end}}{{$file.Name}}{{if .IsLFSFile}} ({{$.i18n.Tr "repo.stored_lfs"}}){{end}}</span> <span class="file mono">{{if $file.IsRenamed}}{{$file.OldName}} &rarr; {{end}}{{$file.Name}}{{if .IsLFSFile}} ({{$.i18n.Tr "repo.stored_lfs"}}){{end}}</span>
</div> </div>
<div class="diff-file-header-actions df ac"> <div class="diff-file-header-actions df ac">
{{if $showFileViewToggle}}
<div class="ui compact icon buttons">
<span class="ui tiny basic button poping up active file-view-toggle" data-toggle-selector="#diff-source-{{$i}}" data-content="{{$.i18n.Tr "repo.file_view_source"}}" data-position="bottom center" data-variation="tiny inverted">{{svg "octicon-code"}}</span>
<span class="ui tiny basic button poping up file-view-toggle" data-toggle-selector="#diff-rendered-{{$i}}" data-content="{{$.i18n.Tr "repo.file_view_rendered"}}" data-position="bottom center" data-variation="tiny inverted">{{svg "octicon-file"}}</span>
</div>
{{end}}
{{if $file.IsProtected}} {{if $file.IsProtected}}
<span class="ui basic label">{{$.i18n.Tr "repo.diff.protected"}}</span> <span class="ui basic label">{{$.i18n.Tr "repo.diff.protected"}}</span>
{{end}} {{end}}
@ -106,22 +114,31 @@
</h4> </h4>
<div class="diff-file-body ui attached unstackable table segment"> <div class="diff-file-body ui attached unstackable table segment">
{{if ne $file.Type 4}} {{if ne $file.Type 4}}
<div class="file-body file-code has-context-menu{{if not $isImage}} code-diff{{end}}{{if $.IsSplitStyle}} code-diff-split{{else}} code-diff-unified{{end}}{{if $isImage}} py-4{{end}}"> <div id="diff-source-{{$i}}" class="file-body file-code has-context-menu code-diff{{if $.IsSplitStyle}} code-diff-split{{else}} code-diff-unified{{end}}">
<table class="chroma{{if $isImage}} w-100{{end}}"> {{if $file.IsBin}}
<tbody> <div class="diff-file-body binary" style="padding: 5px 10px;">{{$.i18n.Tr "repo.diff.bin_not_shown"}}</div>
{{if $isImage}}
{{template "repo/diff/image_diff" dict "file" . "root" $}}
{{else}} {{else}}
<table class="chroma">
{{if $.IsSplitStyle}} {{if $.IsSplitStyle}}
{{template "repo/diff/section_split" dict "file" . "root" $}} {{template "repo/diff/section_split" dict "file" . "root" $}}
{{else}} {{else}}
{{template "repo/diff/section_unified" dict "file" . "root" $}} {{template "repo/diff/section_unified" dict "file" . "root" $}}
{{end}} {{end}}
</table>
{{end}}
</div>
{{if or $isImage $isCsv}}
<div id="diff-rendered-{{$i}}" class="file-body file-code has-context-menu{{if $.IsSplitStyle}} code-diff-split{{else}} code-diff-unified{{end}} hide">
<table class="chroma w-100">
{{if $isImage}}
{{template "repo/diff/image_diff" dict "file" . "root" $}}
{{else}}
{{template "repo/diff/csv_diff" dict "file" . "root" $}}
{{end}} {{end}}
</tbody>
</table> </table>
</div> </div>
{{end}} {{end}}
{{end}}
</div> </div>
</div> </div>
{{end}} {{end}}

View File

@ -0,0 +1,46 @@
<tr>
<td>
{{$result := call .root.CreateCsvDiff .file .root.BaseCommit .root.HeadCommit}}
{{if $result.Error}}
<div class="ui center">{{$result.Error}}</div>
{{else if $result.Sections}}
<table class="data-table">
{{range $i, $section := $result.Sections}}
<tbody {{if gt $i 0}}class="section"{{end}}>
{{range $j, $row := $section.Rows}}
<tr>
{{if and (eq $i 0) (eq $j 0)}}
<th class="line-num">{{.RowIdx}}</th>
{{range $j, $cell := $row.Cells}}
{{if eq $cell.Type 2}}
<th class="modified"><span class="removed-code">{{.LeftCell}}</span> <span class="added-code">{{.RightCell}}</span></th>
{{else if eq $cell.Type 3}}
<th class="added"><span class="added-code">{{.LeftCell}}</span></th>
{{else if eq $cell.Type 4}}
<th class="removed"><span class="removed-code">{{.LeftCell}}</span></th>
{{else}}
<th>{{.RightCell}}</th>
{{end}}
{{end}}
{{else}}
<td class="line-num">{{if .RowIdx}}{{.RowIdx}}{{end}}</td>
{{range $j, $cell := $row.Cells}}
{{if eq $cell.Type 2}}
<td class="modified"><span class="removed-code">{{.LeftCell}}</span> <span class="added-code">{{.RightCell}}</span></td>
{{else if eq $cell.Type 3}}
<td class="added"><span class="added-code">{{.LeftCell}}</span></td>
{{else if eq $cell.Type 4}}
<td class="removed"><span class="removed-code">{{.LeftCell}}</span></td>
{{else}}
<td>{{.RightCell}}</td>
{{end}}
{{end}}
{{end}}
</tr>
{{end}}
</tbody>
{{end}}
</table>
{{end}}
</td>
</tr>

View File

@ -64,7 +64,7 @@
{{end}} {{end}}
</h4> </h4>
<div class="ui attached table unstackable segment"> <div class="ui attached table unstackable segment">
<div class="file-view {{if .IsMarkup}}{{.MarkupType}} markdown{{else if .IsRenderedHTML}}plain-text{{else if .IsTextSource}}code-view{{end}}"> <div class="file-view {{if .IsMarkup}}{{.MarkupType}} {{if ne "csv" .MarkupType}}markdown{{end}}{{else if .IsRenderedHTML}}plain-text{{else if .IsTextSource}}code-view{{end}}">
{{if .IsMarkup}} {{if .IsMarkup}}
{{if .FileContent}}{{.FileContent | Safe}}{{end}} {{if .FileContent}}{{.FileContent | Safe}}{{end}}
{{else if .IsRenderedHTML}} {{else if .IsRenderedHTML}}

View File

@ -2489,6 +2489,18 @@ function initIssueReferenceRepositorySearch() {
}); });
} }
function initFileViewToggle() {
$('.file-view-toggle').on('click', function() {
const $this = $(this);
$this.parent().children().removeClass('active');
$this.addClass('active');
const $target = $($this.data('toggle-selector'));
$target.parent().children().addClass('hide');
$target.removeClass('hide');
});
}
function initLinkAccountView() { function initLinkAccountView() {
const $lnkUserPage = $('.page-content.user.link-account'); const $lnkUserPage = $('.page-content.user.link-account');
if ($lnkUserPage.length === 0) { if ($lnkUserPage.length === 0) {
@ -2756,6 +2768,7 @@ $(document).ready(async () => {
initTableSort(); initTableSort();
initNotificationsTable(); initNotificationsTable();
initPullRequestMergeInstruction(); initPullRequestMergeInstruction();
initFileViewToggle();
initReleaseEditor(); initReleaseEditor();
initRelease(); initRelease();

View File

@ -473,31 +473,6 @@
box-shadow: inset 0 -1px 0 var(--color-secondary); box-shadow: inset 0 -1px 0 var(--color-secondary);
} }
.csv-data td,
.csv-data th {
padding: 5px;
overflow: hidden;
font-size: 12px;
line-height: 1;
text-align: left;
white-space: nowrap;
}
.csv-data .blob-num {
padding: 10px 8px 9px;
text-align: right;
border: 0;
}
.csv-data tr {
border-top: 0;
}
.csv-data th {
font-weight: 600;
border-top: 0;
}
.ui.list .list, .ui.list .list,
ol.ui.list ol, ol.ui.list ol,
ul.ui.list ul { ul.ui.list ul {

View File

@ -1455,6 +1455,65 @@
} }
} }
.data-table {
width: 100%;
tr {
border-top: 0;
}
td,
th {
padding: 5px !important;
overflow: hidden;
font-size: 12px;
text-align: left;
white-space: nowrap;
border: 1px solid var(--color-secondary);
}
td {
white-space: pre-line;
}
th {
font-weight: 600;
background: var(--color-box-header);
border-top: 0;
}
td.added,
th.added,
tr.added {
background-color: var(--color-diff-added-row-bg) !important;
}
td.removed,
th.removed,
tr.removed {
background-color: var(--color-diff-removed-row-bg) !important;
}
tbody.section {
border-top: 2px solid var(--color-secondary);
}
.line-num {
width: 1%;
min-width: 50px;
font-family: monospace;
line-height: 20px;
color: var(--color-secondary-dark-2);
white-space: nowrap;
vertical-align: top;
cursor: pointer;
user-select: none;
text-align: right;
background: var(--color-body);
border: 0;
}
}
.diff-detail-box { .diff-detail-box {
padding: 7px 0; padding: 7px 0;
background: var(--color-body); background: var(--color-body);