forked from p85947160/gitea
348 lines
8.6 KiB
Go
348 lines
8.6 KiB
Go
package regexp2
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
)
|
|
|
|
// Match is a single regex result match that contains groups and repeated captures
|
|
// -Groups
|
|
// -Capture
|
|
type Match struct {
|
|
Group //embeded group 0
|
|
|
|
regex *Regexp
|
|
otherGroups []Group
|
|
|
|
// input to the match
|
|
textpos int
|
|
textstart int
|
|
|
|
capcount int
|
|
caps []int
|
|
sparseCaps map[int]int
|
|
|
|
// output from the match
|
|
matches [][]int
|
|
matchcount []int
|
|
|
|
// whether we've done any balancing with this match. If we
|
|
// have done balancing, we'll need to do extra work in Tidy().
|
|
balancing bool
|
|
}
|
|
|
|
// Group is an explicit or implit (group 0) matched group within the pattern
|
|
type Group struct {
|
|
Capture // the last capture of this group is embeded for ease of use
|
|
|
|
Name string // group name
|
|
Captures []Capture // captures of this group
|
|
}
|
|
|
|
// Capture is a single capture of text within the larger original string
|
|
type Capture struct {
|
|
// the original string
|
|
text []rune
|
|
// the position in the original string where the first character of
|
|
// captured substring was found.
|
|
Index int
|
|
// the length of the captured substring.
|
|
Length int
|
|
}
|
|
|
|
// String returns the captured text as a String
|
|
func (c *Capture) String() string {
|
|
return string(c.text[c.Index : c.Index+c.Length])
|
|
}
|
|
|
|
// Runes returns the captured text as a rune slice
|
|
func (c *Capture) Runes() []rune {
|
|
return c.text[c.Index : c.Index+c.Length]
|
|
}
|
|
|
|
func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
|
|
m := Match{
|
|
regex: regex,
|
|
matchcount: make([]int, capcount),
|
|
matches: make([][]int, capcount),
|
|
textstart: startpos,
|
|
balancing: false,
|
|
}
|
|
m.Name = "0"
|
|
m.text = text
|
|
m.matches[0] = make([]int, 2)
|
|
return &m
|
|
}
|
|
|
|
func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
|
|
m := newMatch(regex, capcount, text, startpos)
|
|
m.sparseCaps = caps
|
|
return m
|
|
}
|
|
|
|
func (m *Match) reset(text []rune, textstart int) {
|
|
m.text = text
|
|
m.textstart = textstart
|
|
for i := 0; i < len(m.matchcount); i++ {
|
|
m.matchcount[i] = 0
|
|
}
|
|
m.balancing = false
|
|
}
|
|
|
|
func (m *Match) tidy(textpos int) {
|
|
|
|
interval := m.matches[0]
|
|
m.Index = interval[0]
|
|
m.Length = interval[1]
|
|
m.textpos = textpos
|
|
m.capcount = m.matchcount[0]
|
|
//copy our root capture to the list
|
|
m.Group.Captures = []Capture{m.Group.Capture}
|
|
|
|
if m.balancing {
|
|
// The idea here is that we want to compact all of our unbalanced captures. To do that we
|
|
// use j basically as a count of how many unbalanced captures we have at any given time
|
|
// (really j is an index, but j/2 is the count). First we skip past all of the real captures
|
|
// until we find a balance captures. Then we check each subsequent entry. If it's a balance
|
|
// capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
|
|
// it down to the last free position.
|
|
for cap := 0; cap < len(m.matchcount); cap++ {
|
|
limit := m.matchcount[cap] * 2
|
|
matcharray := m.matches[cap]
|
|
|
|
var i, j int
|
|
|
|
for i = 0; i < limit; i++ {
|
|
if matcharray[i] < 0 {
|
|
break
|
|
}
|
|
}
|
|
|
|
for j = i; i < limit; i++ {
|
|
if matcharray[i] < 0 {
|
|
// skip negative values
|
|
j--
|
|
} else {
|
|
// but if we find something positive (an actual capture), copy it back to the last
|
|
// unbalanced position.
|
|
if i != j {
|
|
matcharray[j] = matcharray[i]
|
|
}
|
|
j++
|
|
}
|
|
}
|
|
|
|
m.matchcount[cap] = j / 2
|
|
}
|
|
|
|
m.balancing = false
|
|
}
|
|
}
|
|
|
|
// isMatched tells if a group was matched by capnum
|
|
func (m *Match) isMatched(cap int) bool {
|
|
return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
|
|
}
|
|
|
|
// matchIndex returns the index of the last specified matched group by capnum
|
|
func (m *Match) matchIndex(cap int) int {
|
|
i := m.matches[cap][m.matchcount[cap]*2-2]
|
|
if i >= 0 {
|
|
return i
|
|
}
|
|
|
|
return m.matches[cap][-3-i]
|
|
}
|
|
|
|
// matchLength returns the length of the last specified matched group by capnum
|
|
func (m *Match) matchLength(cap int) int {
|
|
i := m.matches[cap][m.matchcount[cap]*2-1]
|
|
if i >= 0 {
|
|
return i
|
|
}
|
|
|
|
return m.matches[cap][-3-i]
|
|
}
|
|
|
|
// Nonpublic builder: add a capture to the group specified by "c"
|
|
func (m *Match) addMatch(c, start, l int) {
|
|
|
|
if m.matches[c] == nil {
|
|
m.matches[c] = make([]int, 2)
|
|
}
|
|
|
|
capcount := m.matchcount[c]
|
|
|
|
if capcount*2+2 > len(m.matches[c]) {
|
|
oldmatches := m.matches[c]
|
|
newmatches := make([]int, capcount*8)
|
|
copy(newmatches, oldmatches[:capcount*2])
|
|
m.matches[c] = newmatches
|
|
}
|
|
|
|
m.matches[c][capcount*2] = start
|
|
m.matches[c][capcount*2+1] = l
|
|
m.matchcount[c] = capcount + 1
|
|
//log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
|
|
}
|
|
|
|
// Nonpublic builder: Add a capture to balance the specified group. This is used by the
|
|
// balanced match construct. (?<foo-foo2>...)
|
|
//
|
|
// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
|
|
// However, since we have backtracking, we need to keep track of everything.
|
|
func (m *Match) balanceMatch(c int) {
|
|
m.balancing = true
|
|
|
|
// we'll look at the last capture first
|
|
capcount := m.matchcount[c]
|
|
target := capcount*2 - 2
|
|
|
|
// first see if it is negative, and therefore is a reference to the next available
|
|
// capture group for balancing. If it is, we'll reset target to point to that capture.
|
|
if m.matches[c][target] < 0 {
|
|
target = -3 - m.matches[c][target]
|
|
}
|
|
|
|
// move back to the previous capture
|
|
target -= 2
|
|
|
|
// if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
|
|
if target >= 0 && m.matches[c][target] < 0 {
|
|
m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
|
|
} else {
|
|
m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
|
|
}
|
|
}
|
|
|
|
// Nonpublic builder: removes a group match by capnum
|
|
func (m *Match) removeMatch(c int) {
|
|
m.matchcount[c]--
|
|
}
|
|
|
|
// GroupCount returns the number of groups this match has matched
|
|
func (m *Match) GroupCount() int {
|
|
return len(m.matchcount)
|
|
}
|
|
|
|
// GroupByName returns a group based on the name of the group, or nil if the group name does not exist
|
|
func (m *Match) GroupByName(name string) *Group {
|
|
num := m.regex.GroupNumberFromName(name)
|
|
if num < 0 {
|
|
return nil
|
|
}
|
|
return m.GroupByNumber(num)
|
|
}
|
|
|
|
// GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
|
|
func (m *Match) GroupByNumber(num int) *Group {
|
|
// check our sparse map
|
|
if m.sparseCaps != nil {
|
|
if newNum, ok := m.sparseCaps[num]; ok {
|
|
num = newNum
|
|
}
|
|
}
|
|
if num >= len(m.matchcount) || num < 0 {
|
|
return nil
|
|
}
|
|
|
|
if num == 0 {
|
|
return &m.Group
|
|
}
|
|
|
|
m.populateOtherGroups()
|
|
|
|
return &m.otherGroups[num-1]
|
|
}
|
|
|
|
// Groups returns all the capture groups, starting with group 0 (the full match)
|
|
func (m *Match) Groups() []Group {
|
|
m.populateOtherGroups()
|
|
g := make([]Group, len(m.otherGroups)+1)
|
|
g[0] = m.Group
|
|
copy(g[1:], m.otherGroups)
|
|
return g
|
|
}
|
|
|
|
func (m *Match) populateOtherGroups() {
|
|
// Construct all the Group objects first time called
|
|
if m.otherGroups == nil {
|
|
m.otherGroups = make([]Group, len(m.matchcount)-1)
|
|
for i := 0; i < len(m.otherGroups); i++ {
|
|
m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
|
|
}
|
|
}
|
|
}
|
|
|
|
func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
|
|
c := m.matchcount[groupnum]
|
|
if c == 0 {
|
|
return
|
|
}
|
|
|
|
matches := m.matches[groupnum]
|
|
|
|
index := matches[(c-1)*2]
|
|
last := index + matches[(c*2)-1]
|
|
|
|
for ; index < last; index++ {
|
|
buf.WriteRune(m.text[index])
|
|
}
|
|
}
|
|
|
|
func newGroup(name string, text []rune, caps []int, capcount int) Group {
|
|
g := Group{}
|
|
g.text = text
|
|
if capcount > 0 {
|
|
g.Index = caps[(capcount-1)*2]
|
|
g.Length = caps[(capcount*2)-1]
|
|
}
|
|
g.Name = name
|
|
g.Captures = make([]Capture, capcount)
|
|
for i := 0; i < capcount; i++ {
|
|
g.Captures[i] = Capture{
|
|
text: text,
|
|
Index: caps[i*2],
|
|
Length: caps[i*2+1],
|
|
}
|
|
}
|
|
//log.Printf("newGroup! capcount %v, %+v", capcount, g)
|
|
|
|
return g
|
|
}
|
|
|
|
func (m *Match) dump() string {
|
|
buf := &bytes.Buffer{}
|
|
buf.WriteRune('\n')
|
|
if len(m.sparseCaps) > 0 {
|
|
for k, v := range m.sparseCaps {
|
|
fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
|
|
}
|
|
}
|
|
|
|
for i, g := range m.Groups() {
|
|
fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
|
|
|
|
for _, c := range g.Captures {
|
|
fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String())
|
|
}
|
|
}
|
|
/*
|
|
for i := 0; i < len(m.matchcount); i++ {
|
|
fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
|
|
|
|
for j := 0; j < m.matchcount[i]; j++ {
|
|
text := ""
|
|
|
|
if m.matches[i][j*2] >= 0 {
|
|
start := m.matches[i][j*2]
|
|
text = m.text[start : start+m.matches[i][j*2+1]]
|
|
}
|
|
|
|
fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
|
|
}
|
|
}
|
|
*/
|
|
return buf.String()
|
|
}
|