Remove x/net/context vendor by using std package (#5202)
* Update dep github.com/markbates/goth * Update dep github.com/blevesearch/bleve * Update dep golang.org/x/oauth2 * Fix github.com/blevesearch/bleve to c74e08f039e56cef576e4336382b2a2d12d9e026 * Update dep golang.org/x/oauth2
This commit is contained in:
parent
b3000ae623
commit
4c1f1f9646
|
@ -90,7 +90,7 @@
|
|||
revision = "3a771d992973f24aa725d07868b467d1ddfceafb"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:67351095005f164e748a5a21899d1403b03878cb2d40a7b0f742376e6eeda974"
|
||||
digest = "1:c10f35be6200b09e26da267ca80f837315093ecaba27e7a223071380efb9dd32"
|
||||
name = "github.com/blevesearch/bleve"
|
||||
packages = [
|
||||
".",
|
||||
|
@ -135,7 +135,7 @@
|
|||
"search/searcher",
|
||||
]
|
||||
pruneopts = "NUT"
|
||||
revision = "ff210fbc6d348ad67aa5754eaea11a463fcddafd"
|
||||
revision = "c74e08f039e56cef576e4336382b2a2d12d9e026"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
|
@ -557,7 +557,7 @@
|
|||
revision = "e3534c89ef969912856dfa39e56b09e58c5f5daf"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:23f75ae90fcc38dac6fad6881006ea7d0f2c78db5f9f81f3df558dc91460e61f"
|
||||
digest = "1:4b992ec853d0ea9bac3dcf09a64af61de1a392e6cb0eef2204c0c92f4ae6b911"
|
||||
name = "github.com/markbates/goth"
|
||||
packages = [
|
||||
".",
|
||||
|
@ -572,8 +572,8 @@
|
|||
"providers/twitter",
|
||||
]
|
||||
pruneopts = "NUT"
|
||||
revision = "f9c6649ab984d6ea71ef1e13b7b1cdffcf4592d3"
|
||||
version = "v1.46.1"
|
||||
revision = "bc6d8ddf751a745f37ca5567dbbfc4157bbf5da9"
|
||||
version = "v1.47.2"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:c9724c929d27a14475a45b17a267dbc60671c0bc2c5c05ed21f011f7b5bc9fb5"
|
||||
|
@ -809,10 +809,11 @@
|
|||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:6d5ed712653ea5321fe3e3475ab2188cf362a4e0d31e9fd3acbd4dfbbca0d680"
|
||||
digest = "1:d0a0bdd2b64d981aa4e6a1ade90431d042cd7fa31b584e33d45e62cbfec43380"
|
||||
name = "golang.org/x/net"
|
||||
packages = [
|
||||
"context",
|
||||
"context/ctxhttp",
|
||||
"html",
|
||||
"html/atom",
|
||||
"html/charset",
|
||||
|
@ -821,14 +822,15 @@
|
|||
revision = "9b4f9f5ad5197c79fd623a3638e70d8b26cef344"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:8159a9cda4b8810aaaeb0d60e2fa68e2fd86d8af4ec8f5059830839e3c8d93d5"
|
||||
branch = "master"
|
||||
digest = "1:274a6321a5a9f185eeb3fab5d7d8397e0e9f57737490d749f562c7e205ffbc2e"
|
||||
name = "golang.org/x/oauth2"
|
||||
packages = [
|
||||
".",
|
||||
"internal",
|
||||
]
|
||||
pruneopts = "NUT"
|
||||
revision = "c10ba270aa0bf8b8c1c986e103859c67a9103061"
|
||||
revision = "c453e0c757598fd055e170a3a359263c91e13153"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:9f303486d623f840492bfeb48eb906a94e9d3fe638a761639b72ce64bf7bfcc3"
|
||||
|
|
10
Gopkg.toml
10
Gopkg.toml
|
@ -14,6 +14,12 @@ ignored = ["google.golang.org/appengine*"]
|
|||
branch = "master"
|
||||
name = "code.gitea.io/sdk"
|
||||
|
||||
[[constraint]]
|
||||
# branch = "master"
|
||||
revision = "c74e08f039e56cef576e4336382b2a2d12d9e026"
|
||||
name = "github.com/blevesearch/bleve"
|
||||
#Not targetting v0.7.0 since standard where use only just after this tag
|
||||
|
||||
[[constraint]]
|
||||
revision = "12dd70caea0268ac0d6c2707d0611ef601e7c64e"
|
||||
name = "golang.org/x/crypto"
|
||||
|
@ -61,7 +67,7 @@ ignored = ["google.golang.org/appengine*"]
|
|||
|
||||
[[constraint]]
|
||||
name = "github.com/markbates/goth"
|
||||
version = "1.46.1"
|
||||
version = "1.47.2"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
|
@ -105,7 +111,7 @@ ignored = ["google.golang.org/appengine*"]
|
|||
source = "github.com/go-gitea/bolt"
|
||||
|
||||
[[override]]
|
||||
revision = "c10ba270aa0bf8b8c1c986e103859c67a9103061"
|
||||
branch = "master"
|
||||
name = "golang.org/x/oauth2"
|
||||
|
||||
[[constraint]]
|
||||
|
|
|
@ -15,11 +15,12 @@
|
|||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/mapping"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
// A Batch groups together multiple Index and Delete
|
||||
|
|
|
@ -100,8 +100,8 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
|||
// prepare new index snapshot
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
segment: make([]*SegmentSnapshot, nsegs, nsegs+1),
|
||||
offsets: make([]uint64, nsegs, nsegs+1),
|
||||
segment: make([]*SegmentSnapshot, 0, nsegs+1),
|
||||
offsets: make([]uint64, 0, nsegs+1),
|
||||
internal: make(map[string][]byte, len(s.root.internal)),
|
||||
epoch: s.nextSnapshotEpoch,
|
||||
refs: 1,
|
||||
|
@ -124,24 +124,29 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
newSnapshot.segment[i] = &SegmentSnapshot{
|
||||
|
||||
newss := &SegmentSnapshot{
|
||||
id: s.root.segment[i].id,
|
||||
segment: s.root.segment[i].segment,
|
||||
cachedDocs: s.root.segment[i].cachedDocs,
|
||||
}
|
||||
s.root.segment[i].segment.AddRef()
|
||||
|
||||
// apply new obsoletions
|
||||
if s.root.segment[i].deleted == nil {
|
||||
newSnapshot.segment[i].deleted = delta
|
||||
newss.deleted = delta
|
||||
} else {
|
||||
newSnapshot.segment[i].deleted = roaring.Or(s.root.segment[i].deleted, delta)
|
||||
newss.deleted = roaring.Or(s.root.segment[i].deleted, delta)
|
||||
}
|
||||
|
||||
newSnapshot.offsets[i] = running
|
||||
// check for live size before copying
|
||||
if newss.LiveSize() > 0 {
|
||||
newSnapshot.segment = append(newSnapshot.segment, newss)
|
||||
s.root.segment[i].segment.AddRef()
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
running += s.root.segment[i].Count()
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// append new segment, if any, to end of the new index snapshot
|
||||
if next.data != nil {
|
||||
newSegmentSnapshot := &SegmentSnapshot{
|
||||
|
@ -193,6 +198,12 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
// prepare new index snapshot
|
||||
currSize := len(s.root.segment)
|
||||
newSize := currSize + 1 - len(nextMerge.old)
|
||||
|
||||
// empty segments deletion
|
||||
if nextMerge.new == nil {
|
||||
newSize--
|
||||
}
|
||||
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
segment: make([]*SegmentSnapshot, 0, newSize),
|
||||
|
@ -210,7 +221,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
segmentID := s.root.segment[i].id
|
||||
if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
|
||||
// this segment is going away, see if anything else was deleted since we started the merge
|
||||
if s.root.segment[i].deleted != nil {
|
||||
if segSnapAtMerge != nil && s.root.segment[i].deleted != nil {
|
||||
// assume all these deletes are new
|
||||
deletedSince := s.root.segment[i].deleted
|
||||
// if we already knew about some of them, remove
|
||||
|
@ -224,7 +235,13 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
newSegmentDeleted.Add(uint32(newDocNum))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// clean up the old segment map to figure out the
|
||||
// obsolete segments wrt root in meantime, whatever
|
||||
// segments left behind in old map after processing
|
||||
// the root segments would be the obsolete segment set
|
||||
delete(nextMerge.old, segmentID)
|
||||
|
||||
} else if s.root.segment[i].LiveSize() > 0 {
|
||||
// this segment is staying
|
||||
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
|
||||
id: s.root.segment[i].id,
|
||||
|
@ -238,6 +255,24 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
}
|
||||
}
|
||||
|
||||
// before the newMerge introduction, need to clean the newly
|
||||
// merged segment wrt the current root segments, hence
|
||||
// applying the obsolete segment contents to newly merged segment
|
||||
for segID, ss := range nextMerge.old {
|
||||
obsoleted := ss.DocNumbersLive()
|
||||
if obsoleted != nil {
|
||||
obsoletedIter := obsoleted.Iterator()
|
||||
for obsoletedIter.HasNext() {
|
||||
oldDocNum := obsoletedIter.Next()
|
||||
newDocNum := nextMerge.oldNewDocNums[segID][oldDocNum]
|
||||
newSegmentDeleted.Add(uint32(newDocNum))
|
||||
}
|
||||
}
|
||||
}
|
||||
// In case where all the docs in the newly merged segment getting
|
||||
// deleted by the time we reach here, can skip the introduction.
|
||||
if nextMerge.new != nil &&
|
||||
nextMerge.new.Count() > newSegmentDeleted.GetCardinality() {
|
||||
// put new segment at end
|
||||
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
|
||||
id: nextMerge.id,
|
||||
|
@ -246,6 +281,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
cachedDocs: &cachedDocs{cache: nil},
|
||||
})
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
}
|
||||
|
||||
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
|
||||
|
||||
// swap in new segment
|
||||
rootPrev := s.root
|
||||
|
@ -257,7 +295,8 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
// notify merger we incorporated this
|
||||
// notify requester that we incorporated this
|
||||
nextMerge.notify <- newSnapshot
|
||||
close(nextMerge.notify)
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,9 @@
|
|||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
|
||||
"fmt"
|
||||
"os"
|
||||
"sync/atomic"
|
||||
|
@ -28,6 +31,13 @@ import (
|
|||
|
||||
func (s *Scorch) mergerLoop() {
|
||||
var lastEpochMergePlanned uint64
|
||||
mergePlannerOptions, err := s.parseMergePlannerOptions()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("mergePlannerOption json parsing err: %v", err))
|
||||
s.asyncTasks.Done()
|
||||
return
|
||||
}
|
||||
|
||||
OUTER:
|
||||
for {
|
||||
select {
|
||||
|
@ -45,7 +55,7 @@ OUTER:
|
|||
startTime := time.Now()
|
||||
|
||||
// lets get started
|
||||
err := s.planMergeAtSnapshot(ourSnapshot)
|
||||
err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("merging err: %v", err))
|
||||
_ = ourSnapshot.DecRef()
|
||||
|
@ -58,51 +68,49 @@ OUTER:
|
|||
_ = ourSnapshot.DecRef()
|
||||
|
||||
// tell the persister we're waiting for changes
|
||||
// first make a notification chan
|
||||
notifyUs := make(notificationChan)
|
||||
// first make a epochWatcher chan
|
||||
ew := &epochWatcher{
|
||||
epoch: lastEpochMergePlanned,
|
||||
notifyCh: make(notificationChan, 1),
|
||||
}
|
||||
|
||||
// give it to the persister
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case s.persisterNotifier <- notifyUs:
|
||||
case s.persisterNotifier <- ew:
|
||||
}
|
||||
|
||||
// check again
|
||||
s.rootLock.RLock()
|
||||
ourSnapshot = s.root
|
||||
ourSnapshot.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
if ourSnapshot.epoch != lastEpochMergePlanned {
|
||||
startTime := time.Now()
|
||||
|
||||
// lets get started
|
||||
err := s.planMergeAtSnapshot(ourSnapshot)
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("merging err: %v", err))
|
||||
_ = ourSnapshot.DecRef()
|
||||
continue OUTER
|
||||
}
|
||||
lastEpochMergePlanned = ourSnapshot.epoch
|
||||
|
||||
s.fireEvent(EventKindMergerProgress, time.Since(startTime))
|
||||
}
|
||||
_ = ourSnapshot.DecRef()
|
||||
|
||||
// now wait for it (but also detect close)
|
||||
// now wait for persister (but also detect close)
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case <-notifyUs:
|
||||
// woken up, next loop should pick up work
|
||||
case <-ew.notifyCh:
|
||||
}
|
||||
}
|
||||
}
|
||||
s.asyncTasks.Done()
|
||||
}
|
||||
|
||||
func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
|
||||
func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
|
||||
error) {
|
||||
mergePlannerOptions := mergeplan.DefaultMergePlanOptions
|
||||
if v, ok := s.config["scorchMergePlanOptions"]; ok {
|
||||
b, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return &mergePlannerOptions, err
|
||||
}
|
||||
|
||||
err = json.Unmarshal(b, &mergePlannerOptions)
|
||||
if err != nil {
|
||||
return &mergePlannerOptions, err
|
||||
}
|
||||
}
|
||||
return &mergePlannerOptions, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
||||
options *mergeplan.MergePlanOptions) error {
|
||||
// build list of zap segments in this snapshot
|
||||
var onlyZapSnapshots []mergeplan.Segment
|
||||
for _, segmentSnapshot := range ourSnapshot.segment {
|
||||
|
@ -112,7 +120,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
|
|||
}
|
||||
|
||||
// give this list to the planner
|
||||
resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, nil)
|
||||
resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options)
|
||||
if err != nil {
|
||||
return fmt.Errorf("merge planning err: %v", err)
|
||||
}
|
||||
|
@ -122,8 +130,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
|
|||
}
|
||||
|
||||
// process tasks in serial for now
|
||||
var notifications []notificationChan
|
||||
var notifications []chan *IndexSnapshot
|
||||
for _, task := range resultMergePlan.Tasks {
|
||||
if len(task.Segments) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
oldMap := make(map[uint64]*SegmentSnapshot)
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments))
|
||||
|
@ -132,40 +144,51 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
|
|||
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
|
||||
oldMap[segSnapshot.id] = segSnapshot
|
||||
if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok {
|
||||
if segSnapshot.LiveSize() == 0 {
|
||||
oldMap[segSnapshot.id] = nil
|
||||
} else {
|
||||
segmentsToMerge = append(segmentsToMerge, zapSeg)
|
||||
docsToDrop = append(docsToDrop, segSnapshot.deleted)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var oldNewDocNums map[uint64][]uint64
|
||||
var segment segment.Segment
|
||||
if len(segmentsToMerge) > 0 {
|
||||
filename := zapFileName(newSegmentID)
|
||||
s.markIneligibleForRemoval(filename)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, DefaultChunkFactor)
|
||||
newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
return fmt.Errorf("merging failed: %v", err)
|
||||
}
|
||||
segment, err := zap.Open(path)
|
||||
segment, err = zap.Open(path)
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
return err
|
||||
}
|
||||
oldNewDocNums = make(map[uint64][]uint64)
|
||||
for i, segNewDocNums := range newDocNums {
|
||||
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
|
||||
}
|
||||
}
|
||||
|
||||
sm := &segmentMerge{
|
||||
id: newSegmentID,
|
||||
old: oldMap,
|
||||
oldNewDocNums: make(map[uint64][]uint64),
|
||||
oldNewDocNums: oldNewDocNums,
|
||||
new: segment,
|
||||
notify: make(notificationChan),
|
||||
notify: make(chan *IndexSnapshot, 1),
|
||||
}
|
||||
notifications = append(notifications, sm.notify)
|
||||
for i, segNewDocNums := range newDocNums {
|
||||
sm.oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
|
||||
}
|
||||
|
||||
// give it to the introducer
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
_ = segment.Close()
|
||||
return nil
|
||||
case s.merges <- sm:
|
||||
}
|
||||
|
@ -174,7 +197,10 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
|
|||
select {
|
||||
case <-s.closeCh:
|
||||
return nil
|
||||
case <-notification:
|
||||
case newSnapshot := <-notification:
|
||||
if newSnapshot != nil {
|
||||
_ = newSnapshot.DecRef()
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
@ -185,5 +211,72 @@ type segmentMerge struct {
|
|||
old map[uint64]*SegmentSnapshot
|
||||
oldNewDocNums map[uint64][]uint64
|
||||
new segment.Segment
|
||||
notify notificationChan
|
||||
notify chan *IndexSnapshot
|
||||
}
|
||||
|
||||
// perform a merging of the given SegmentBase instances into a new,
|
||||
// persisted segment, and synchronously introduce that new segment
|
||||
// into the root
|
||||
func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
||||
sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
|
||||
chunkFactor uint32) (uint64, *IndexSnapshot, uint64, error) {
|
||||
var br bytes.Buffer
|
||||
|
||||
cr := zap.NewCountHashWriter(&br)
|
||||
|
||||
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
|
||||
docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
|
||||
zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
|
||||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
|
||||
sb, err := zap.InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor,
|
||||
fieldsMap, fieldsInv, numDocs, storedIndexOffset, fieldsIndexOffset,
|
||||
docValueOffset, dictLocs)
|
||||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
|
||||
filename := zapFileName(newSegmentID)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
err = zap.PersistSegmentBase(sb, path)
|
||||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
|
||||
segment, err := zap.Open(path)
|
||||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
|
||||
sm := &segmentMerge{
|
||||
id: newSegmentID,
|
||||
old: make(map[uint64]*SegmentSnapshot),
|
||||
oldNewDocNums: make(map[uint64][]uint64),
|
||||
new: segment,
|
||||
notify: make(chan *IndexSnapshot, 1),
|
||||
}
|
||||
|
||||
for i, idx := range sbsIndexes {
|
||||
ss := snapshot.segment[idx]
|
||||
sm.old[ss.id] = ss
|
||||
sm.oldNewDocNums[ss.id] = newDocNums[i]
|
||||
}
|
||||
|
||||
select { // send to introducer
|
||||
case <-s.closeCh:
|
||||
_ = segment.DecRef()
|
||||
return 0, nil, 0, nil // TODO: return ErrInterruptedClosed?
|
||||
case s.merges <- sm:
|
||||
}
|
||||
|
||||
select { // wait for introduction to complete
|
||||
case <-s.closeCh:
|
||||
return 0, nil, 0, nil // TODO: return ErrInterruptedClosed?
|
||||
case newSnapshot := <-sm.notify:
|
||||
return numDocs, newSnapshot, newSegmentID, nil
|
||||
}
|
||||
}
|
||||
|
|
|
@ -186,13 +186,13 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
|||
|
||||
// While we’re over budget, keep looping, which might produce
|
||||
// another MergeTask.
|
||||
for len(eligibles) > budgetNumSegments {
|
||||
for len(eligibles) > 0 && (len(eligibles)+len(rv.Tasks)) > budgetNumSegments {
|
||||
// Track a current best roster as we examine and score
|
||||
// potential rosters of merges.
|
||||
var bestRoster []Segment
|
||||
var bestRosterScore float64 // Lower score is better.
|
||||
|
||||
for startIdx := 0; startIdx < len(eligibles)-o.SegmentsPerMergeTask; startIdx++ {
|
||||
for startIdx := 0; startIdx < len(eligibles); startIdx++ {
|
||||
var roster []Segment
|
||||
var rosterLiveSize int64
|
||||
|
||||
|
|
|
@ -34,22 +34,39 @@ import (
|
|||
|
||||
var DefaultChunkFactor uint32 = 1024
|
||||
|
||||
// Arbitrary number, need to make it configurable.
|
||||
// Lower values like 10/making persister really slow
|
||||
// doesn't work well as it is creating more files to
|
||||
// persist for in next persist iteration and spikes the # FDs.
|
||||
// Ideal value should let persister also proceed at
|
||||
// an optimum pace so that the merger can skip
|
||||
// many intermediate snapshots.
|
||||
// This needs to be based on empirical data.
|
||||
// TODO - may need to revisit this approach/value.
|
||||
var epochDistance = uint64(5)
|
||||
|
||||
type notificationChan chan struct{}
|
||||
|
||||
func (s *Scorch) persisterLoop() {
|
||||
defer s.asyncTasks.Done()
|
||||
|
||||
var notifyChs []notificationChan
|
||||
var lastPersistedEpoch uint64
|
||||
var persistWatchers []*epochWatcher
|
||||
var lastPersistedEpoch, lastMergedEpoch uint64
|
||||
var ew *epochWatcher
|
||||
OUTER:
|
||||
for {
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case notifyCh := <-s.persisterNotifier:
|
||||
notifyChs = append(notifyChs, notifyCh)
|
||||
case ew = <-s.persisterNotifier:
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
default:
|
||||
}
|
||||
if ew != nil && ew.epoch > lastMergedEpoch {
|
||||
lastMergedEpoch = ew.epoch
|
||||
}
|
||||
persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
|
||||
&lastMergedEpoch, persistWatchers)
|
||||
|
||||
var ourSnapshot *IndexSnapshot
|
||||
var ourPersisted []chan error
|
||||
|
@ -81,10 +98,11 @@ OUTER:
|
|||
}
|
||||
|
||||
lastPersistedEpoch = ourSnapshot.epoch
|
||||
for _, notifyCh := range notifyChs {
|
||||
close(notifyCh)
|
||||
for _, ew := range persistWatchers {
|
||||
close(ew.notifyCh)
|
||||
}
|
||||
notifyChs = nil
|
||||
|
||||
persistWatchers = nil
|
||||
_ = ourSnapshot.DecRef()
|
||||
|
||||
changed := false
|
||||
|
@ -120,27 +138,155 @@ OUTER:
|
|||
break OUTER
|
||||
case <-w.notifyCh:
|
||||
// woken up, next loop should pick up work
|
||||
continue OUTER
|
||||
case ew = <-s.persisterNotifier:
|
||||
// if the watchers are already caught up then let them wait,
|
||||
// else let them continue to do the catch up
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func notifyMergeWatchers(lastPersistedEpoch uint64,
|
||||
persistWatchers []*epochWatcher) []*epochWatcher {
|
||||
var watchersNext []*epochWatcher
|
||||
for _, w := range persistWatchers {
|
||||
if w.epoch < lastPersistedEpoch {
|
||||
close(w.notifyCh)
|
||||
} else {
|
||||
watchersNext = append(watchersNext, w)
|
||||
}
|
||||
}
|
||||
return watchersNext
|
||||
}
|
||||
|
||||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch *uint64,
|
||||
persistWatchers []*epochWatcher) []*epochWatcher {
|
||||
|
||||
// first, let the watchers proceed if they lag behind
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
|
||||
OUTER:
|
||||
// check for slow merger and await until the merger catch up
|
||||
for lastPersistedEpoch > *lastMergedEpoch+epochDistance {
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case ew := <-s.persisterNotifier:
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
*lastMergedEpoch = ew.epoch
|
||||
}
|
||||
|
||||
// let the watchers proceed if they lag behind
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
}
|
||||
|
||||
return persistWatchers
|
||||
}
|
||||
|
||||
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
|
||||
persisted, err := s.persistSnapshotMaybeMerge(snapshot)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if persisted {
|
||||
return nil
|
||||
}
|
||||
|
||||
return s.persistSnapshotDirect(snapshot)
|
||||
}
|
||||
|
||||
// DefaultMinSegmentsForInMemoryMerge represents the default number of
|
||||
// in-memory zap segments that persistSnapshotMaybeMerge() needs to
|
||||
// see in an IndexSnapshot before it decides to merge and persist
|
||||
// those segments
|
||||
var DefaultMinSegmentsForInMemoryMerge = 2
|
||||
|
||||
// persistSnapshotMaybeMerge examines the snapshot and might merge and
|
||||
// persist the in-memory zap segments if there are enough of them
|
||||
func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
|
||||
bool, error) {
|
||||
// collect the in-memory zap segments (SegmentBase instances)
|
||||
var sbs []*zap.SegmentBase
|
||||
var sbsDrops []*roaring.Bitmap
|
||||
var sbsIndexes []int
|
||||
|
||||
for i, segmentSnapshot := range snapshot.segment {
|
||||
if sb, ok := segmentSnapshot.segment.(*zap.SegmentBase); ok {
|
||||
sbs = append(sbs, sb)
|
||||
sbsDrops = append(sbsDrops, segmentSnapshot.deleted)
|
||||
sbsIndexes = append(sbsIndexes, i)
|
||||
}
|
||||
}
|
||||
|
||||
if len(sbs) < DefaultMinSegmentsForInMemoryMerge {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
_, newSnapshot, newSegmentID, err := s.mergeSegmentBases(
|
||||
snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if newSnapshot == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = newSnapshot.DecRef()
|
||||
}()
|
||||
|
||||
mergedSegmentIDs := map[uint64]struct{}{}
|
||||
for _, idx := range sbsIndexes {
|
||||
mergedSegmentIDs[snapshot.segment[idx].id] = struct{}{}
|
||||
}
|
||||
|
||||
// construct a snapshot that's logically equivalent to the input
|
||||
// snapshot, but with merged segments replaced by the new segment
|
||||
equiv := &IndexSnapshot{
|
||||
parent: snapshot.parent,
|
||||
segment: make([]*SegmentSnapshot, 0, len(snapshot.segment)),
|
||||
internal: snapshot.internal,
|
||||
epoch: snapshot.epoch,
|
||||
}
|
||||
|
||||
// copy to the equiv the segments that weren't replaced
|
||||
for _, segment := range snapshot.segment {
|
||||
if _, wasMerged := mergedSegmentIDs[segment.id]; !wasMerged {
|
||||
equiv.segment = append(equiv.segment, segment)
|
||||
}
|
||||
}
|
||||
|
||||
// append to the equiv the new segment
|
||||
for _, segment := range newSnapshot.segment {
|
||||
if segment.id == newSegmentID {
|
||||
equiv.segment = append(equiv.segment, &SegmentSnapshot{
|
||||
id: newSegmentID,
|
||||
segment: segment.segment,
|
||||
deleted: nil, // nil since merging handled deletions
|
||||
})
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
err = s.persistSnapshotDirect(equiv)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
|
||||
// start a write transaction
|
||||
tx, err := s.rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// defer fsync of the rootbolt
|
||||
// defer rollback on error
|
||||
defer func() {
|
||||
if err == nil {
|
||||
err = s.rootBolt.Sync()
|
||||
}
|
||||
}()
|
||||
// defer commit/rollback transaction
|
||||
defer func() {
|
||||
if err == nil {
|
||||
err = tx.Commit()
|
||||
} else {
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
}()
|
||||
|
@ -172,20 +318,20 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
|
|||
newSegmentPaths := make(map[uint64]string)
|
||||
|
||||
// first ensure that each segment in this snapshot has been persisted
|
||||
for i, segmentSnapshot := range snapshot.segment {
|
||||
snapshotSegmentKey := segment.EncodeUvarintAscending(nil, uint64(i))
|
||||
snapshotSegmentBucket, err2 := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
for _, segmentSnapshot := range snapshot.segment {
|
||||
snapshotSegmentKey := segment.EncodeUvarintAscending(nil, segmentSnapshot.id)
|
||||
snapshotSegmentBucket, err := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch seg := segmentSnapshot.segment.(type) {
|
||||
case *zap.SegmentBase:
|
||||
// need to persist this to disk
|
||||
filename := zapFileName(segmentSnapshot.id)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
err2 := zap.PersistSegmentBase(seg, path)
|
||||
if err2 != nil {
|
||||
return fmt.Errorf("error persisting segment: %v", err2)
|
||||
err = zap.PersistSegmentBase(seg, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error persisting segment: %v", err)
|
||||
}
|
||||
newSegmentPaths[segmentSnapshot.id] = path
|
||||
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
|
||||
|
@ -218,19 +364,28 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
|
|||
}
|
||||
}
|
||||
|
||||
// only alter the root if we actually persisted a segment
|
||||
// (sometimes its just a new snapshot, possibly with new internal values)
|
||||
// we need to swap in a new root only when we've persisted 1 or
|
||||
// more segments -- whereby the new root would have 1-for-1
|
||||
// replacements of in-memory segments with file-based segments
|
||||
//
|
||||
// other cases like updates to internal values only, and/or when
|
||||
// there are only deletions, are already covered and persisted by
|
||||
// the newly populated boltdb snapshotBucket above
|
||||
if len(newSegmentPaths) > 0 {
|
||||
// now try to open all the new snapshots
|
||||
newSegments := make(map[uint64]segment.Segment)
|
||||
defer func() {
|
||||
for _, s := range newSegments {
|
||||
if s != nil {
|
||||
// cleanup segments that were opened but not
|
||||
// swapped into the new root
|
||||
_ = s.Close()
|
||||
}
|
||||
}
|
||||
}()
|
||||
for segmentID, path := range newSegmentPaths {
|
||||
newSegments[segmentID], err = zap.Open(path)
|
||||
if err != nil {
|
||||
for _, s := range newSegments {
|
||||
if s != nil {
|
||||
_ = s.Close() // cleanup segments that were successfully opened
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("error opening new segment at %s, %v", path, err)
|
||||
}
|
||||
}
|
||||
|
@ -255,6 +410,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
|
|||
cachedDocs: segmentSnapshot.cachedDocs,
|
||||
}
|
||||
newIndexSnapshot.segment[i] = newSegmentSnapshot
|
||||
delete(newSegments, segmentSnapshot.id)
|
||||
// update items persisted incase of a new segment snapshot
|
||||
atomic.AddUint64(&s.stats.numItemsPersisted, newSegmentSnapshot.Count())
|
||||
} else {
|
||||
|
@ -266,9 +422,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
|
|||
for k, v := range s.root.internal {
|
||||
newIndexSnapshot.internal[k] = v
|
||||
}
|
||||
for _, filename := range filenames {
|
||||
delete(s.ineligibleForRemoval, filename)
|
||||
}
|
||||
|
||||
rootPrev := s.root
|
||||
s.root = newIndexSnapshot
|
||||
s.rootLock.Unlock()
|
||||
|
@ -277,6 +431,24 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
|
|||
}
|
||||
}
|
||||
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = s.rootBolt.Sync()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// allow files to become eligible for removal after commit, such
|
||||
// as file segments from snapshots that came from the merger
|
||||
s.rootLock.Lock()
|
||||
for _, filename := range filenames {
|
||||
delete(s.ineligibleForRemoval, filename)
|
||||
}
|
||||
s.rootLock.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ type Scorch struct {
|
|||
merges chan *segmentMerge
|
||||
introducerNotifier chan *epochWatcher
|
||||
revertToSnapshots chan *snapshotReversion
|
||||
persisterNotifier chan notificationChan
|
||||
persisterNotifier chan *epochWatcher
|
||||
rootBolt *bolt.DB
|
||||
asyncTasks sync.WaitGroup
|
||||
|
||||
|
@ -114,6 +114,25 @@ func (s *Scorch) fireAsyncError(err error) {
|
|||
}
|
||||
|
||||
func (s *Scorch) Open() error {
|
||||
err := s.openBolt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.asyncTasks.Add(1)
|
||||
go s.mainLoop()
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
s.asyncTasks.Add(1)
|
||||
go s.persisterLoop()
|
||||
s.asyncTasks.Add(1)
|
||||
go s.mergerLoop()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) openBolt() error {
|
||||
var ok bool
|
||||
s.path, ok = s.config["path"].(string)
|
||||
if !ok {
|
||||
|
@ -136,6 +155,7 @@ func (s *Scorch) Open() error {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt"
|
||||
var err error
|
||||
if s.path != "" {
|
||||
|
@ -156,7 +176,7 @@ func (s *Scorch) Open() error {
|
|||
s.merges = make(chan *segmentMerge)
|
||||
s.introducerNotifier = make(chan *epochWatcher, 1)
|
||||
s.revertToSnapshots = make(chan *snapshotReversion)
|
||||
s.persisterNotifier = make(chan notificationChan)
|
||||
s.persisterNotifier = make(chan *epochWatcher, 1)
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
err := s.removeOldZapFiles() // Before persister or merger create any new files.
|
||||
|
@ -166,16 +186,6 @@ func (s *Scorch) Open() error {
|
|||
}
|
||||
}
|
||||
|
||||
s.asyncTasks.Add(1)
|
||||
go s.mainLoop()
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
s.asyncTasks.Add(1)
|
||||
go s.persisterLoop()
|
||||
s.asyncTasks.Add(1)
|
||||
go s.mergerLoop()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -310,17 +320,21 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
|||
introduction.persisted = make(chan error, 1)
|
||||
}
|
||||
|
||||
// get read lock, to optimistically prepare obsoleted info
|
||||
// optimistically prepare obsoletes outside of rootLock
|
||||
s.rootLock.RLock()
|
||||
for _, seg := range s.root.segment {
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
for _, seg := range root.segment {
|
||||
delta, err := seg.segment.DocNumbers(ids)
|
||||
if err != nil {
|
||||
s.rootLock.RUnlock()
|
||||
return err
|
||||
}
|
||||
introduction.obsoletes[seg.id] = delta
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
_ = root.DecRef()
|
||||
|
||||
s.introductions <- introduction
|
||||
|
||||
|
|
|
@ -95,6 +95,21 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
|
|||
var numTokenFrequencies int
|
||||
var totLocs int
|
||||
|
||||
// initial scan for all fieldID's to sort them
|
||||
for _, result := range results {
|
||||
for _, field := range result.Document.CompositeFields {
|
||||
s.getOrDefineField(field.Name())
|
||||
}
|
||||
for _, field := range result.Document.Fields {
|
||||
s.getOrDefineField(field.Name())
|
||||
}
|
||||
}
|
||||
sort.Strings(s.FieldsInv[1:]) // keep _id as first field
|
||||
s.FieldsMap = make(map[string]uint16, len(s.FieldsInv))
|
||||
for fieldID, fieldName := range s.FieldsInv {
|
||||
s.FieldsMap[fieldName] = uint16(fieldID + 1)
|
||||
}
|
||||
|
||||
processField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
|
||||
for term, tf := range tfs {
|
||||
pidPlus1, exists := s.Dicts[fieldID][term]
|
||||
|
|
|
@ -76,6 +76,8 @@ type DictionaryIterator struct {
|
|||
prefix string
|
||||
end string
|
||||
offset int
|
||||
|
||||
dictEntry index.DictEntry // reused across Next()'s
|
||||
}
|
||||
|
||||
// Next returns the next entry in the dictionary
|
||||
|
@ -95,8 +97,7 @@ func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
|
|||
|
||||
d.offset++
|
||||
postingID := d.d.segment.Dicts[d.d.fieldID][next]
|
||||
return &index.DictEntry{
|
||||
Term: next,
|
||||
Count: d.d.segment.Postings[postingID-1].GetCardinality(),
|
||||
}, nil
|
||||
d.dictEntry.Term = next
|
||||
d.dictEntry.Count = d.d.segment.Postings[postingID-1].GetCardinality()
|
||||
return &d.dictEntry, nil
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ import (
|
|||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
const version uint32 = 2
|
||||
const version uint32 = 3
|
||||
|
||||
const fieldNotUninverted = math.MaxUint64
|
||||
|
||||
|
@ -187,79 +187,42 @@ func persistBase(memSegment *mem.Segment, cr *CountHashWriter, chunkFactor uint3
|
|||
}
|
||||
|
||||
func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error) {
|
||||
|
||||
var curr int
|
||||
var metaBuf bytes.Buffer
|
||||
var data, compressed []byte
|
||||
|
||||
metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
|
||||
|
||||
docNumOffsets := make(map[int]uint64, len(memSegment.Stored))
|
||||
|
||||
for docNum, storedValues := range memSegment.Stored {
|
||||
if docNum != 0 {
|
||||
// reset buffer if necessary
|
||||
curr = 0
|
||||
metaBuf.Reset()
|
||||
data = data[:0]
|
||||
compressed = compressed[:0]
|
||||
curr = 0
|
||||
}
|
||||
|
||||
metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
|
||||
|
||||
st := memSegment.StoredTypes[docNum]
|
||||
sp := memSegment.StoredPos[docNum]
|
||||
|
||||
// encode fields in order
|
||||
for fieldID := range memSegment.FieldsInv {
|
||||
if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
|
||||
// has stored values for this field
|
||||
num := len(storedFieldValues)
|
||||
|
||||
stf := st[uint16(fieldID)]
|
||||
spf := sp[uint16(fieldID)]
|
||||
|
||||
// process each value
|
||||
for i := 0; i < num; i++ {
|
||||
// encode field
|
||||
_, err2 := metaEncoder.PutU64(uint64(fieldID))
|
||||
if err2 != nil {
|
||||
return 0, err2
|
||||
}
|
||||
// encode type
|
||||
_, err2 = metaEncoder.PutU64(uint64(stf[i]))
|
||||
if err2 != nil {
|
||||
return 0, err2
|
||||
}
|
||||
// encode start offset
|
||||
_, err2 = metaEncoder.PutU64(uint64(curr))
|
||||
if err2 != nil {
|
||||
return 0, err2
|
||||
}
|
||||
// end len
|
||||
_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
|
||||
if err2 != nil {
|
||||
return 0, err2
|
||||
}
|
||||
// encode number of array pos
|
||||
_, err2 = metaEncoder.PutU64(uint64(len(spf[i])))
|
||||
if err2 != nil {
|
||||
return 0, err2
|
||||
}
|
||||
// encode all array positions
|
||||
for _, pos := range spf[i] {
|
||||
_, err2 = metaEncoder.PutU64(pos)
|
||||
var err2 error
|
||||
curr, data, err2 = persistStoredFieldValues(fieldID,
|
||||
storedFieldValues, stf, spf, curr, metaEncoder, data)
|
||||
if err2 != nil {
|
||||
return 0, err2
|
||||
}
|
||||
}
|
||||
// append data
|
||||
data = append(data, storedFieldValues[i]...)
|
||||
// update curr
|
||||
curr += len(storedFieldValues[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
metaEncoder.Close()
|
||||
|
||||
metaEncoder.Close()
|
||||
metaBytes := metaBuf.Bytes()
|
||||
|
||||
// compress the data
|
||||
|
@ -299,6 +262,51 @@ func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error)
|
|||
return rv, nil
|
||||
}
|
||||
|
||||
func persistStoredFieldValues(fieldID int,
|
||||
storedFieldValues [][]byte, stf []byte, spf [][]uint64,
|
||||
curr int, metaEncoder *govarint.Base128Encoder, data []byte) (
|
||||
int, []byte, error) {
|
||||
for i := 0; i < len(storedFieldValues); i++ {
|
||||
// encode field
|
||||
_, err := metaEncoder.PutU64(uint64(fieldID))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// encode type
|
||||
_, err = metaEncoder.PutU64(uint64(stf[i]))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// encode start offset
|
||||
_, err = metaEncoder.PutU64(uint64(curr))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// end len
|
||||
_, err = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// encode number of array pos
|
||||
_, err = metaEncoder.PutU64(uint64(len(spf[i])))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// encode all array positions
|
||||
for _, pos := range spf[i] {
|
||||
_, err = metaEncoder.PutU64(pos)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
data = append(data, storedFieldValues[i]...)
|
||||
curr += len(storedFieldValues[i])
|
||||
}
|
||||
|
||||
return curr, data, nil
|
||||
}
|
||||
|
||||
func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
|
||||
var freqOffsets, locOfffsets []uint64
|
||||
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
|
||||
|
@ -580,7 +588,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// resetting encoder for the next field
|
||||
// reseting encoder for the next field
|
||||
fdvEncoder.Reset()
|
||||
}
|
||||
|
||||
|
@ -625,12 +633,21 @@ func NewSegmentBase(memSegment *mem.Segment, chunkFactor uint32) (*SegmentBase,
|
|||
return nil, err
|
||||
}
|
||||
|
||||
return InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor,
|
||||
memSegment.FieldsMap, memSegment.FieldsInv, numDocs,
|
||||
storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs)
|
||||
}
|
||||
|
||||
func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
|
||||
fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
|
||||
storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
|
||||
dictLocs []uint64) (*SegmentBase, error) {
|
||||
sb := &SegmentBase{
|
||||
mem: br.Bytes(),
|
||||
memCRC: cr.Sum32(),
|
||||
mem: mem,
|
||||
memCRC: memCRC,
|
||||
chunkFactor: chunkFactor,
|
||||
fieldsMap: memSegment.FieldsMap,
|
||||
fieldsInv: memSegment.FieldsInv,
|
||||
fieldsMap: fieldsMap,
|
||||
fieldsInv: fieldsInv,
|
||||
numDocs: numDocs,
|
||||
storedIndexOffset: storedIndexOffset,
|
||||
fieldsIndexOffset: fieldsIndexOffset,
|
||||
|
@ -639,7 +656,7 @@ func NewSegmentBase(memSegment *mem.Segment, chunkFactor uint32) (*SegmentBase,
|
|||
fieldDvIterMap: make(map[uint16]*docValueIterator),
|
||||
}
|
||||
|
||||
err = sb.loadDvIterators()
|
||||
err := sb.loadDvIterators()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ type chunkedContentCoder struct {
|
|||
// MetaData represents the data information inside a
|
||||
// chunk.
|
||||
type MetaData struct {
|
||||
DocID uint64 // docid of the data inside the chunk
|
||||
DocNum uint64 // docNum of the data inside the chunk
|
||||
DocDvLoc uint64 // starting offset for a given docid
|
||||
DocDvLen uint64 // length of data inside the chunk for the given docid
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ func newChunkedContentCoder(chunkSize uint64,
|
|||
rv := &chunkedContentCoder{
|
||||
chunkSize: chunkSize,
|
||||
chunkLens: make([]uint64, total),
|
||||
chunkMeta: []MetaData{},
|
||||
chunkMeta: make([]MetaData, 0, total),
|
||||
}
|
||||
|
||||
return rv
|
||||
|
@ -68,7 +68,7 @@ func (c *chunkedContentCoder) Reset() {
|
|||
for i := range c.chunkLens {
|
||||
c.chunkLens[i] = 0
|
||||
}
|
||||
c.chunkMeta = []MetaData{}
|
||||
c.chunkMeta = c.chunkMeta[:0]
|
||||
}
|
||||
|
||||
// Close indicates you are done calling Add() this allows
|
||||
|
@ -88,7 +88,7 @@ func (c *chunkedContentCoder) flushContents() error {
|
|||
|
||||
// write out the metaData slice
|
||||
for _, meta := range c.chunkMeta {
|
||||
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocID, meta.DocDvLoc, meta.DocDvLen)
|
||||
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvLoc, meta.DocDvLen)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
|
|||
// clearing the chunk specific meta for next chunk
|
||||
c.chunkBuf.Reset()
|
||||
c.chunkMetaBuf.Reset()
|
||||
c.chunkMeta = []MetaData{}
|
||||
c.chunkMeta = c.chunkMeta[:0]
|
||||
c.currChunk = chunk
|
||||
}
|
||||
|
||||
|
@ -130,7 +130,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
|
|||
}
|
||||
|
||||
c.chunkMeta = append(c.chunkMeta, MetaData{
|
||||
DocID: docNum,
|
||||
DocNum: docNum,
|
||||
DocDvLoc: uint64(dvOffset),
|
||||
DocDvLen: uint64(dvSize),
|
||||
})
|
||||
|
|
|
@ -34,32 +34,47 @@ type Dictionary struct {
|
|||
|
||||
// PostingsList returns the postings list for the specified term
|
||||
func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
|
||||
return d.postingsList([]byte(term), except)
|
||||
return d.postingsList([]byte(term), except, nil)
|
||||
}
|
||||
|
||||
func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap) (*PostingsList, error) {
|
||||
rv := &PostingsList{
|
||||
sb: d.sb,
|
||||
term: term,
|
||||
except: except,
|
||||
func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
|
||||
if d.fst == nil {
|
||||
return d.postingsListInit(rv, except), nil
|
||||
}
|
||||
|
||||
if d.fst != nil {
|
||||
postingsOffset, exists, err := d.fst.Get(term)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("vellum err: %v", err)
|
||||
}
|
||||
if exists {
|
||||
err = rv.read(postingsOffset, d)
|
||||
if !exists {
|
||||
return d.postingsListInit(rv, except), nil
|
||||
}
|
||||
|
||||
return d.postingsListFromOffset(postingsOffset, except, rv)
|
||||
}
|
||||
|
||||
func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
|
||||
rv = d.postingsListInit(rv, except)
|
||||
|
||||
err := rv.read(postingsOffset, d)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
|
||||
if rv == nil {
|
||||
rv = &PostingsList{}
|
||||
} else {
|
||||
*rv = PostingsList{} // clear the struct
|
||||
}
|
||||
rv.sb = d.sb
|
||||
rv.except = except
|
||||
return rv
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for this dictionary
|
||||
func (d *Dictionary) Iterator() segment.DictionaryIterator {
|
||||
rv := &DictionaryIterator{
|
||||
|
|
|
@ -99,7 +99,7 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
|
|||
func (di *docValueIterator) loadDvChunk(chunkNumber,
|
||||
localDocNum uint64, s *SegmentBase) error {
|
||||
// advance to the chunk where the docValues
|
||||
// reside for the given docID
|
||||
// reside for the given docNum
|
||||
destChunkDataLoc := di.dvDataLoc
|
||||
for i := 0; i < int(chunkNumber); i++ {
|
||||
destChunkDataLoc += di.chunkLens[i]
|
||||
|
@ -116,7 +116,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
|
|||
offset := uint64(0)
|
||||
di.curChunkHeader = make([]MetaData, int(numDocs))
|
||||
for i := 0; i < int(numDocs); i++ {
|
||||
di.curChunkHeader[i].DocID, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(read)
|
||||
di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(read)
|
||||
|
@ -131,10 +131,10 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
|
|||
return nil
|
||||
}
|
||||
|
||||
func (di *docValueIterator) visitDocValues(docID uint64,
|
||||
func (di *docValueIterator) visitDocValues(docNum uint64,
|
||||
visitor index.DocumentFieldTermVisitor) error {
|
||||
// binary search the term locations for the docID
|
||||
start, length := di.getDocValueLocs(docID)
|
||||
// binary search the term locations for the docNum
|
||||
start, length := di.getDocValueLocs(docNum)
|
||||
if start == math.MaxUint64 || length == math.MaxUint64 {
|
||||
return nil
|
||||
}
|
||||
|
@ -144,7 +144,7 @@ func (di *docValueIterator) visitDocValues(docID uint64,
|
|||
return err
|
||||
}
|
||||
|
||||
// pick the terms for the given docID
|
||||
// pick the terms for the given docNum
|
||||
uncompressed = uncompressed[start : start+length]
|
||||
for {
|
||||
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
|
||||
|
@ -159,11 +159,11 @@ func (di *docValueIterator) visitDocValues(docID uint64,
|
|||
return nil
|
||||
}
|
||||
|
||||
func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) {
|
||||
func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
|
||||
i := sort.Search(len(di.curChunkHeader), func(i int) bool {
|
||||
return di.curChunkHeader[i].DocID >= docID
|
||||
return di.curChunkHeader[i].DocNum >= docNum
|
||||
})
|
||||
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocID == docID {
|
||||
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
|
||||
return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen
|
||||
}
|
||||
return math.MaxUint64, math.MaxUint64
|
||||
|
|
124
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
generated
vendored
Normal file
124
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
generated
vendored
Normal file
|
@ -0,0 +1,124 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/couchbase/vellum"
|
||||
)
|
||||
|
||||
// enumerator provides an ordered traversal of multiple vellum
|
||||
// iterators. Like JOIN of iterators, the enumerator produces a
|
||||
// sequence of (key, iteratorIndex, value) tuples, sorted by key ASC,
|
||||
// then iteratorIndex ASC, where the same key might be seen or
|
||||
// repeated across multiple child iterators.
|
||||
type enumerator struct {
|
||||
itrs []vellum.Iterator
|
||||
currKs [][]byte
|
||||
currVs []uint64
|
||||
|
||||
lowK []byte
|
||||
lowIdxs []int
|
||||
lowCurr int
|
||||
}
|
||||
|
||||
// newEnumerator returns a new enumerator over the vellum Iterators
|
||||
func newEnumerator(itrs []vellum.Iterator) (*enumerator, error) {
|
||||
rv := &enumerator{
|
||||
itrs: itrs,
|
||||
currKs: make([][]byte, len(itrs)),
|
||||
currVs: make([]uint64, len(itrs)),
|
||||
lowIdxs: make([]int, 0, len(itrs)),
|
||||
}
|
||||
for i, itr := range rv.itrs {
|
||||
rv.currKs[i], rv.currVs[i] = itr.Current()
|
||||
}
|
||||
rv.updateMatches()
|
||||
if rv.lowK == nil {
|
||||
return rv, vellum.ErrIteratorDone
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// updateMatches maintains the low key matches based on the currKs
|
||||
func (m *enumerator) updateMatches() {
|
||||
m.lowK = nil
|
||||
m.lowIdxs = m.lowIdxs[:0]
|
||||
m.lowCurr = 0
|
||||
|
||||
for i, key := range m.currKs {
|
||||
if key == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
cmp := bytes.Compare(key, m.lowK)
|
||||
if cmp < 0 || m.lowK == nil {
|
||||
// reached a new low
|
||||
m.lowK = key
|
||||
m.lowIdxs = m.lowIdxs[:0]
|
||||
m.lowIdxs = append(m.lowIdxs, i)
|
||||
} else if cmp == 0 {
|
||||
m.lowIdxs = append(m.lowIdxs, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Current returns the enumerator's current key, iterator-index, and
|
||||
// value. If the enumerator is not pointing at a valid value (because
|
||||
// Next returned an error previously), Current will return nil,0,0.
|
||||
func (m *enumerator) Current() ([]byte, int, uint64) {
|
||||
var i int
|
||||
var v uint64
|
||||
if m.lowCurr < len(m.lowIdxs) {
|
||||
i = m.lowIdxs[m.lowCurr]
|
||||
v = m.currVs[i]
|
||||
}
|
||||
return m.lowK, i, v
|
||||
}
|
||||
|
||||
// Next advances the enumerator to the next key/iterator/value result,
|
||||
// else vellum.ErrIteratorDone is returned.
|
||||
func (m *enumerator) Next() error {
|
||||
m.lowCurr += 1
|
||||
if m.lowCurr >= len(m.lowIdxs) {
|
||||
// move all the current low iterators forwards
|
||||
for _, vi := range m.lowIdxs {
|
||||
err := m.itrs[vi].Next()
|
||||
if err != nil && err != vellum.ErrIteratorDone {
|
||||
return err
|
||||
}
|
||||
m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current()
|
||||
}
|
||||
m.updateMatches()
|
||||
}
|
||||
if m.lowK == nil {
|
||||
return vellum.ErrIteratorDone
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close all the underlying Iterators. The first error, if any, will
|
||||
// be returned.
|
||||
func (m *enumerator) Close() error {
|
||||
var rv error
|
||||
for _, itr := range m.itrs {
|
||||
err := itr.Close()
|
||||
if rv == nil {
|
||||
rv = err
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
|
@ -30,6 +30,8 @@ type chunkedIntCoder struct {
|
|||
encoder *govarint.Base128Encoder
|
||||
chunkLens []uint64
|
||||
currChunk uint64
|
||||
|
||||
buf []byte
|
||||
}
|
||||
|
||||
// newChunkedIntCoder returns a new chunk int coder which packs data into
|
||||
|
@ -67,12 +69,8 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
|
|||
// starting a new chunk
|
||||
if c.encoder != nil {
|
||||
// close out last
|
||||
c.encoder.Close()
|
||||
encodingBytes := c.chunkBuf.Bytes()
|
||||
c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
|
||||
c.final = append(c.final, encodingBytes...)
|
||||
c.Close()
|
||||
c.chunkBuf.Reset()
|
||||
c.encoder = govarint.NewU64Base128Encoder(&c.chunkBuf)
|
||||
}
|
||||
c.currChunk = chunk
|
||||
}
|
||||
|
@ -98,26 +96,25 @@ func (c *chunkedIntCoder) Close() {
|
|||
|
||||
// Write commits all the encoded chunked integers to the provided writer.
|
||||
func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
|
||||
var tw int
|
||||
buf := make([]byte, binary.MaxVarintLen64)
|
||||
// write out the number of chunks
|
||||
bufNeeded := binary.MaxVarintLen64 * (1 + len(c.chunkLens))
|
||||
if len(c.buf) < bufNeeded {
|
||||
c.buf = make([]byte, bufNeeded)
|
||||
}
|
||||
buf := c.buf
|
||||
|
||||
// write out the number of chunks & each chunkLen
|
||||
n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
|
||||
nw, err := w.Write(buf[:n])
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
// write out the chunk lens
|
||||
for _, chunkLen := range c.chunkLens {
|
||||
n := binary.PutUvarint(buf, uint64(chunkLen))
|
||||
nw, err = w.Write(buf[:n])
|
||||
tw += nw
|
||||
n += binary.PutUvarint(buf[n:], uint64(chunkLen))
|
||||
}
|
||||
|
||||
tw, err := w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
}
|
||||
|
||||
// write out the data
|
||||
nw, err = w.Write(c.final)
|
||||
nw, err := w.Write(c.final)
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"sort"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/Smerity/govarint"
|
||||
|
@ -28,6 +29,8 @@ import (
|
|||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
|
||||
|
||||
// Merge takes a slice of zap segments and bit masks describing which
|
||||
// documents may be dropped, and creates a new segment containing the
|
||||
// remaining data. This new segment is built at the specified path,
|
||||
|
@ -46,47 +49,26 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
|
|||
_ = os.Remove(path)
|
||||
}
|
||||
|
||||
segmentBases := make([]*SegmentBase, len(segments))
|
||||
for segmenti, segment := range segments {
|
||||
segmentBases[segmenti] = &segment.SegmentBase
|
||||
}
|
||||
|
||||
// buffer the output
|
||||
br := bufio.NewWriter(f)
|
||||
|
||||
// wrap it for counting (tracking offsets)
|
||||
cr := NewCountHashWriter(br)
|
||||
|
||||
fieldsInv := mergeFields(segments)
|
||||
fieldsMap := mapFields(fieldsInv)
|
||||
|
||||
var newDocNums [][]uint64
|
||||
var storedIndexOffset uint64
|
||||
fieldDvLocsOffset := uint64(fieldNotUninverted)
|
||||
var dictLocs []uint64
|
||||
|
||||
newSegDocCount := computeNewDocCount(segments, drops)
|
||||
if newSegDocCount > 0 {
|
||||
storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
|
||||
fieldsMap, fieldsInv, newSegDocCount, cr)
|
||||
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err :=
|
||||
MergeToWriter(segmentBases, drops, chunkFactor, cr)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dictLocs, fieldDvLocsOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
|
||||
newDocNums, newSegDocCount, chunkFactor, cr)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
dictLocs = make([]uint64, len(fieldsInv))
|
||||
}
|
||||
|
||||
fieldsIndexOffset, err := persistFields(fieldsInv, cr, dictLocs)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = persistFooter(newSegDocCount, storedIndexOffset,
|
||||
fieldsIndexOffset, fieldDvLocsOffset, chunkFactor, cr.Sum32(), cr)
|
||||
err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset,
|
||||
docValueOffset, chunkFactor, cr.Sum32(), cr)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, err
|
||||
|
@ -113,21 +95,59 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
|
|||
return newDocNums, nil
|
||||
}
|
||||
|
||||
// mapFields takes the fieldsInv list and builds the map
|
||||
func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
|
||||
chunkFactor uint32, cr *CountHashWriter) (
|
||||
newDocNums [][]uint64,
|
||||
numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
|
||||
dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16,
|
||||
err error) {
|
||||
docValueOffset = uint64(fieldNotUninverted)
|
||||
|
||||
var fieldsSame bool
|
||||
fieldsSame, fieldsInv = mergeFields(segments)
|
||||
fieldsMap = mapFields(fieldsInv)
|
||||
|
||||
numDocs = computeNewDocCount(segments, drops)
|
||||
if numDocs > 0 {
|
||||
storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
|
||||
fieldsMap, fieldsInv, fieldsSame, numDocs, cr)
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, 0, nil, nil, nil, err
|
||||
}
|
||||
|
||||
dictLocs, docValueOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
|
||||
newDocNums, numDocs, chunkFactor, cr)
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, 0, nil, nil, nil, err
|
||||
}
|
||||
} else {
|
||||
dictLocs = make([]uint64, len(fieldsInv))
|
||||
}
|
||||
|
||||
fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, 0, nil, nil, nil, err
|
||||
}
|
||||
|
||||
return newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, fieldsInv, fieldsMap, nil
|
||||
}
|
||||
|
||||
// mapFields takes the fieldsInv list and returns a map of fieldName
|
||||
// to fieldID+1
|
||||
func mapFields(fields []string) map[string]uint16 {
|
||||
rv := make(map[string]uint16, len(fields))
|
||||
for i, fieldName := range fields {
|
||||
rv[fieldName] = uint16(i)
|
||||
rv[fieldName] = uint16(i) + 1
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// computeNewDocCount determines how many documents will be in the newly
|
||||
// merged segment when obsoleted docs are dropped
|
||||
func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
|
||||
func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64 {
|
||||
var newDocCount uint64
|
||||
for segI, segment := range segments {
|
||||
newDocCount += segment.NumDocs()
|
||||
newDocCount += segment.numDocs
|
||||
if drops[segI] != nil {
|
||||
newDocCount -= drops[segI].GetCardinality()
|
||||
}
|
||||
|
@ -135,8 +155,8 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
|
|||
return newDocCount
|
||||
}
|
||||
|
||||
func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
||||
fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64,
|
||||
func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||
fieldsInv []string, fieldsMap map[string]uint16, newDocNumsIn [][]uint64,
|
||||
newSegDocCount uint64, chunkFactor uint32,
|
||||
w *CountHashWriter) ([]uint64, uint64, error) {
|
||||
|
||||
|
@ -144,9 +164,14 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
|
||||
var bufLoc []uint64
|
||||
|
||||
var postings *PostingsList
|
||||
var postItr *PostingsIterator
|
||||
|
||||
rv := make([]uint64, len(fieldsInv))
|
||||
fieldDvLocs := make([]uint64, len(fieldsInv))
|
||||
fieldDvLocsOffset := uint64(fieldNotUninverted)
|
||||
|
||||
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
|
||||
locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
|
||||
|
||||
// docTermMap is keyed by docNum, where the array impl provides
|
||||
// better memory usage behavior than a sparse-friendlier hashmap
|
||||
|
@ -166,36 +191,31 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
return nil, 0, err
|
||||
}
|
||||
|
||||
// collect FST iterators from all segments for this field
|
||||
// collect FST iterators from all active segments for this field
|
||||
var newDocNums [][]uint64
|
||||
var drops []*roaring.Bitmap
|
||||
var dicts []*Dictionary
|
||||
var itrs []vellum.Iterator
|
||||
for _, segment := range segments {
|
||||
|
||||
for segmentI, segment := range segments {
|
||||
dict, err2 := segment.dictionary(fieldName)
|
||||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
}
|
||||
dicts = append(dicts, dict)
|
||||
|
||||
if dict != nil && dict.fst != nil {
|
||||
itr, err2 := dict.fst.Iterator(nil, nil)
|
||||
if err2 != nil && err2 != vellum.ErrIteratorDone {
|
||||
return nil, 0, err2
|
||||
}
|
||||
if itr != nil {
|
||||
newDocNums = append(newDocNums, newDocNumsIn[segmentI])
|
||||
drops = append(drops, dropsIn[segmentI])
|
||||
dicts = append(dicts, dict)
|
||||
itrs = append(itrs, itr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create merging iterator
|
||||
mergeItr, err := vellum.NewMergeIterator(itrs, func(postingOffsets []uint64) uint64 {
|
||||
// we don't actually use the merged value
|
||||
return 0
|
||||
})
|
||||
|
||||
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
|
||||
locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
|
||||
|
||||
if uint64(cap(docTermMap)) < newSegDocCount {
|
||||
docTermMap = make([][]byte, newSegDocCount)
|
||||
} else {
|
||||
|
@ -205,30 +225,103 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
}
|
||||
}
|
||||
|
||||
for err == nil {
|
||||
term, _ := mergeItr.Current()
|
||||
var prevTerm []byte
|
||||
|
||||
newRoaring := roaring.NewBitmap()
|
||||
newRoaringLocs := roaring.NewBitmap()
|
||||
|
||||
finishTerm := func(term []byte) error {
|
||||
if term == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
tfEncoder.Close()
|
||||
locEncoder.Close()
|
||||
|
||||
if newRoaring.GetCardinality() > 0 {
|
||||
// this field/term actually has hits in the new segment, lets write it down
|
||||
freqOffset := uint64(w.Count())
|
||||
_, err := tfEncoder.Write(w)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
locOffset := uint64(w.Count())
|
||||
_, err = locEncoder.Write(w)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
postingLocOffset := uint64(w.Count())
|
||||
_, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
postingOffset := uint64(w.Count())
|
||||
|
||||
// write out the start of the term info
|
||||
n := binary.PutUvarint(bufMaxVarintLen64, freqOffset)
|
||||
_, err = w.Write(bufMaxVarintLen64[:n])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write out the start of the loc info
|
||||
n = binary.PutUvarint(bufMaxVarintLen64, locOffset)
|
||||
_, err = w.Write(bufMaxVarintLen64[:n])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write out the start of the posting locs
|
||||
n = binary.PutUvarint(bufMaxVarintLen64, postingLocOffset)
|
||||
_, err = w.Write(bufMaxVarintLen64[:n])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = newVellum.Insert(term, postingOffset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
newRoaring = roaring.NewBitmap()
|
||||
newRoaringLocs = roaring.NewBitmap()
|
||||
|
||||
tfEncoder.Reset()
|
||||
locEncoder.Reset()
|
||||
|
||||
// now go back and get posting list for this term
|
||||
// but pass in the deleted docs for that segment
|
||||
for dictI, dict := range dicts {
|
||||
if dict == nil {
|
||||
continue
|
||||
return nil
|
||||
}
|
||||
postings, err2 := dict.postingsList(term, drops[dictI])
|
||||
|
||||
enumerator, err := newEnumerator(itrs)
|
||||
|
||||
for err == nil {
|
||||
term, itrI, postingsOffset := enumerator.Current()
|
||||
|
||||
if !bytes.Equal(prevTerm, term) {
|
||||
// if the term changed, write out the info collected
|
||||
// for the previous term
|
||||
err2 := finishTerm(prevTerm)
|
||||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
}
|
||||
}
|
||||
|
||||
var err2 error
|
||||
postings, err2 = dicts[itrI].postingsListFromOffset(
|
||||
postingsOffset, drops[itrI], postings)
|
||||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
}
|
||||
|
||||
postItr := postings.Iterator()
|
||||
newDocNumsI := newDocNums[itrI]
|
||||
|
||||
postItr = postings.iterator(postItr)
|
||||
next, err2 := postItr.Next()
|
||||
for next != nil && err2 == nil {
|
||||
hitNewDocNum := newDocNums[dictI][next.Number()]
|
||||
hitNewDocNum := newDocNumsI[next.Number()]
|
||||
if hitNewDocNum == docDropped {
|
||||
return nil, 0, fmt.Errorf("see hit with dropped doc num")
|
||||
}
|
||||
|
@ -248,7 +341,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
|
||||
}
|
||||
args := bufLoc[0:5]
|
||||
args[0] = uint64(fieldsMap[loc.Field()])
|
||||
args[0] = uint64(fieldsMap[loc.Field()] - 1)
|
||||
args[1] = loc.Pos()
|
||||
args[2] = loc.Start()
|
||||
args[3] = loc.End()
|
||||
|
@ -269,67 +362,21 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
}
|
||||
}
|
||||
|
||||
tfEncoder.Close()
|
||||
locEncoder.Close()
|
||||
prevTerm = prevTerm[:0] // copy to prevTerm in case Next() reuses term mem
|
||||
prevTerm = append(prevTerm, term...)
|
||||
|
||||
if newRoaring.GetCardinality() > 0 {
|
||||
// this field/term actually has hits in the new segment, lets write it down
|
||||
freqOffset := uint64(w.Count())
|
||||
_, err = tfEncoder.Write(w)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
locOffset := uint64(w.Count())
|
||||
_, err = locEncoder.Write(w)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
postingLocOffset := uint64(w.Count())
|
||||
_, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
postingOffset := uint64(w.Count())
|
||||
// write out the start of the term info
|
||||
buf := bufMaxVarintLen64
|
||||
n := binary.PutUvarint(buf, freqOffset)
|
||||
_, err = w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// write out the start of the loc info
|
||||
n = binary.PutUvarint(buf, locOffset)
|
||||
_, err = w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// write out the start of the loc posting list
|
||||
n = binary.PutUvarint(buf, postingLocOffset)
|
||||
_, err = w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
_, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
err = newVellum.Insert(term, postingOffset)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
err = mergeItr.Next()
|
||||
err = enumerator.Next()
|
||||
}
|
||||
if err != nil && err != vellum.ErrIteratorDone {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
err = finishTerm(prevTerm)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
dictOffset := uint64(w.Count())
|
||||
|
||||
err = newVellum.Close()
|
||||
|
@ -378,7 +425,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
}
|
||||
}
|
||||
|
||||
fieldDvLocsOffset = uint64(w.Count())
|
||||
fieldDvLocsOffset := uint64(w.Count())
|
||||
|
||||
buf := bufMaxVarintLen64
|
||||
for _, offset := range fieldDvLocs {
|
||||
|
@ -392,10 +439,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
return rv, fieldDvLocsOffset, nil
|
||||
}
|
||||
|
||||
const docDropped = math.MaxUint64
|
||||
|
||||
func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
|
||||
fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64,
|
||||
func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
|
||||
fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
|
||||
w *CountHashWriter) (uint64, [][]uint64, error) {
|
||||
var rv [][]uint64 // The remapped or newDocNums for each segment.
|
||||
|
||||
|
@ -417,10 +462,30 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
|
|||
for segI, segment := range segments {
|
||||
segNewDocNums := make([]uint64, segment.numDocs)
|
||||
|
||||
dropsI := drops[segI]
|
||||
|
||||
// optimize when the field mapping is the same across all
|
||||
// segments and there are no deletions, via byte-copying
|
||||
// of stored docs bytes directly to the writer
|
||||
if fieldsSame && (dropsI == nil || dropsI.GetCardinality() == 0) {
|
||||
err := segment.copyStoredDocs(newDocNum, docNumOffsets, w)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
for i := uint64(0); i < segment.numDocs; i++ {
|
||||
segNewDocNums[i] = newDocNum
|
||||
newDocNum++
|
||||
}
|
||||
rv = append(rv, segNewDocNums)
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// for each doc num
|
||||
for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
|
||||
// TODO: roaring's API limits docNums to 32-bits?
|
||||
if drops[segI] != nil && drops[segI].Contains(uint32(docNum)) {
|
||||
if dropsI != nil && dropsI.Contains(uint32(docNum)) {
|
||||
segNewDocNums[docNum] = docDropped
|
||||
continue
|
||||
}
|
||||
|
@ -439,7 +504,7 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
|
|||
poss[i] = poss[i][:0]
|
||||
}
|
||||
err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
|
||||
fieldID := int(fieldsMap[field])
|
||||
fieldID := int(fieldsMap[field]) - 1
|
||||
vals[fieldID] = append(vals[fieldID], value)
|
||||
typs[fieldID] = append(typs[fieldID], typ)
|
||||
poss[fieldID] = append(poss[fieldID], pos)
|
||||
|
@ -453,48 +518,15 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
|
|||
for fieldID := range fieldsInv {
|
||||
storedFieldValues := vals[int(fieldID)]
|
||||
|
||||
// has stored values for this field
|
||||
num := len(storedFieldValues)
|
||||
stf := typs[int(fieldID)]
|
||||
spf := poss[int(fieldID)]
|
||||
|
||||
// process each value
|
||||
for i := 0; i < num; i++ {
|
||||
// encode field
|
||||
_, err2 := metaEncoder.PutU64(uint64(fieldID))
|
||||
var err2 error
|
||||
curr, data, err2 = persistStoredFieldValues(fieldID,
|
||||
storedFieldValues, stf, spf, curr, metaEncoder, data)
|
||||
if err2 != nil {
|
||||
return 0, nil, err2
|
||||
}
|
||||
// encode type
|
||||
_, err2 = metaEncoder.PutU64(uint64(typs[int(fieldID)][i]))
|
||||
if err2 != nil {
|
||||
return 0, nil, err2
|
||||
}
|
||||
// encode start offset
|
||||
_, err2 = metaEncoder.PutU64(uint64(curr))
|
||||
if err2 != nil {
|
||||
return 0, nil, err2
|
||||
}
|
||||
// end len
|
||||
_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
|
||||
if err2 != nil {
|
||||
return 0, nil, err2
|
||||
}
|
||||
// encode number of array pos
|
||||
_, err2 = metaEncoder.PutU64(uint64(len(poss[int(fieldID)][i])))
|
||||
if err2 != nil {
|
||||
return 0, nil, err2
|
||||
}
|
||||
// encode all array positions
|
||||
for j := 0; j < len(poss[int(fieldID)][i]); j++ {
|
||||
_, err2 = metaEncoder.PutU64(poss[int(fieldID)][i][j])
|
||||
if err2 != nil {
|
||||
return 0, nil, err2
|
||||
}
|
||||
}
|
||||
// append data
|
||||
data = append(data, storedFieldValues[i]...)
|
||||
// update curr
|
||||
curr += len(storedFieldValues[i])
|
||||
}
|
||||
}
|
||||
|
||||
metaEncoder.Close()
|
||||
|
@ -528,36 +560,87 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
|
|||
}
|
||||
|
||||
// return value is the start of the stored index
|
||||
offset := uint64(w.Count())
|
||||
storedIndexOffset := uint64(w.Count())
|
||||
|
||||
// now write out the stored doc index
|
||||
for docNum := range docNumOffsets {
|
||||
err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum])
|
||||
for _, docNumOffset := range docNumOffsets {
|
||||
err := binary.Write(w, binary.BigEndian, docNumOffset)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return offset, rv, nil
|
||||
return storedIndexOffset, rv, nil
|
||||
}
|
||||
|
||||
// mergeFields builds a unified list of fields used across all the input segments
|
||||
func mergeFields(segments []*Segment) []string {
|
||||
fieldsMap := map[string]struct{}{}
|
||||
// copyStoredDocs writes out a segment's stored doc info, optimized by
|
||||
// using a single Write() call for the entire set of bytes. The
|
||||
// newDocNumOffsets is filled with the new offsets for each doc.
|
||||
func (s *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64,
|
||||
w *CountHashWriter) error {
|
||||
if s.numDocs <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
indexOffset0, storedOffset0, _, _, _ :=
|
||||
s.getDocStoredOffsets(0) // the segment's first doc
|
||||
|
||||
indexOffsetN, storedOffsetN, readN, metaLenN, dataLenN :=
|
||||
s.getDocStoredOffsets(s.numDocs - 1) // the segment's last doc
|
||||
|
||||
storedOffset0New := uint64(w.Count())
|
||||
|
||||
storedBytes := s.mem[storedOffset0 : storedOffsetN+readN+metaLenN+dataLenN]
|
||||
_, err := w.Write(storedBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// remap the storedOffset's for the docs into new offsets relative
|
||||
// to storedOffset0New, filling the given docNumOffsetsOut array
|
||||
for indexOffset := indexOffset0; indexOffset <= indexOffsetN; indexOffset += 8 {
|
||||
storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
|
||||
storedOffsetNew := storedOffset - storedOffset0 + storedOffset0New
|
||||
newDocNumOffsets[newDocNum] = storedOffsetNew
|
||||
newDocNum += 1
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mergeFields builds a unified list of fields used across all the
|
||||
// input segments, and computes whether the fields are the same across
|
||||
// segments (which depends on fields to be sorted in the same way
|
||||
// across segments)
|
||||
func mergeFields(segments []*SegmentBase) (bool, []string) {
|
||||
fieldsSame := true
|
||||
|
||||
var segment0Fields []string
|
||||
if len(segments) > 0 {
|
||||
segment0Fields = segments[0].Fields()
|
||||
}
|
||||
|
||||
fieldsExist := map[string]struct{}{}
|
||||
for _, segment := range segments {
|
||||
fields := segment.Fields()
|
||||
for _, field := range fields {
|
||||
fieldsMap[field] = struct{}{}
|
||||
for fieldi, field := range fields {
|
||||
fieldsExist[field] = struct{}{}
|
||||
if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field {
|
||||
fieldsSame = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rv := make([]string, 0, len(fieldsMap))
|
||||
rv := make([]string, 0, len(fieldsExist))
|
||||
// ensure _id stays first
|
||||
rv = append(rv, "_id")
|
||||
for k := range fieldsMap {
|
||||
for k := range fieldsExist {
|
||||
if k != "_id" {
|
||||
rv = append(rv, k)
|
||||
}
|
||||
}
|
||||
return rv
|
||||
|
||||
sort.Strings(rv[1:]) // leave _id as first
|
||||
|
||||
return fieldsSame, rv
|
||||
}
|
||||
|
|
|
@ -28,21 +28,27 @@ import (
|
|||
// PostingsList is an in-memory represenation of a postings list
|
||||
type PostingsList struct {
|
||||
sb *SegmentBase
|
||||
term []byte
|
||||
postingsOffset uint64
|
||||
freqOffset uint64
|
||||
locOffset uint64
|
||||
locBitmap *roaring.Bitmap
|
||||
postings *roaring.Bitmap
|
||||
except *roaring.Bitmap
|
||||
postingKey []byte
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for this postings list
|
||||
func (p *PostingsList) Iterator() segment.PostingsIterator {
|
||||
rv := &PostingsIterator{
|
||||
postings: p,
|
||||
return p.iterator(nil)
|
||||
}
|
||||
|
||||
func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
|
||||
if rv == nil {
|
||||
rv = &PostingsIterator{}
|
||||
} else {
|
||||
*rv = PostingsIterator{} // clear the struct
|
||||
}
|
||||
rv.postings = p
|
||||
|
||||
if p.postings != nil {
|
||||
// prepare the freq chunk details
|
||||
var n uint64
|
||||
|
|
|
@ -17,15 +17,27 @@ package zap
|
|||
import "encoding/binary"
|
||||
|
||||
func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
|
||||
docStoredStartAddr := s.storedIndexOffset + (8 * docNum)
|
||||
docStoredStart := binary.BigEndian.Uint64(s.mem[docStoredStartAddr : docStoredStartAddr+8])
|
||||
var n uint64
|
||||
metaLen, read := binary.Uvarint(s.mem[docStoredStart : docStoredStart+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
var dataLen uint64
|
||||
dataLen, read = binary.Uvarint(s.mem[docStoredStart+n : docStoredStart+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
meta := s.mem[docStoredStart+n : docStoredStart+n+metaLen]
|
||||
data := s.mem[docStoredStart+n+metaLen : docStoredStart+n+metaLen+dataLen]
|
||||
_, storedOffset, n, metaLen, dataLen := s.getDocStoredOffsets(docNum)
|
||||
|
||||
meta := s.mem[storedOffset+n : storedOffset+n+metaLen]
|
||||
data := s.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen]
|
||||
|
||||
return meta, data
|
||||
}
|
||||
|
||||
func (s *SegmentBase) getDocStoredOffsets(docNum uint64) (
|
||||
uint64, uint64, uint64, uint64, uint64) {
|
||||
indexOffset := s.storedIndexOffset + (8 * docNum)
|
||||
|
||||
storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
|
||||
|
||||
var n uint64
|
||||
|
||||
metaLen, read := binary.Uvarint(s.mem[storedOffset : storedOffset+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
||||
dataLen, read := binary.Uvarint(s.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
||||
return indexOffset, storedOffset, n, metaLen, dataLen
|
||||
}
|
||||
|
|
|
@ -343,8 +343,9 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
var postings *PostingsList
|
||||
for _, id := range ids {
|
||||
postings, err := idDict.postingsList([]byte(id), nil)
|
||||
postings, err = idDict.postingsList([]byte(id), nil, postings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -31,10 +31,9 @@ func (r *RollbackPoint) GetInternal(key []byte) []byte {
|
|||
return r.meta[string(key)]
|
||||
}
|
||||
|
||||
// RollbackPoints returns an array of rollback points available
|
||||
// for the application to make a decision on where to rollback
|
||||
// to. A nil return value indicates that there are no available
|
||||
// rollback points.
|
||||
// RollbackPoints returns an array of rollback points available for
|
||||
// the application to rollback to, with more recent rollback points
|
||||
// (higher epochs) coming first.
|
||||
func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
|
||||
if s.rootBolt == nil {
|
||||
return nil, fmt.Errorf("RollbackPoints: root is nil")
|
||||
|
@ -54,7 +53,7 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
|
|||
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil, fmt.Errorf("RollbackPoints: no snapshots available")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
rollbackPoints := []*RollbackPoint{}
|
||||
|
@ -150,10 +149,7 @@ func (s *Scorch) Rollback(to *RollbackPoint) error {
|
|||
|
||||
revert.snapshot = indexSnapshot
|
||||
revert.applied = make(chan error)
|
||||
|
||||
if !s.unsafeBatch {
|
||||
revert.persisted = make(chan error)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
@ -173,9 +169,5 @@ func (s *Scorch) Rollback(to *RollbackPoint) error {
|
|||
return fmt.Errorf("Rollback: failed with err: %v", err)
|
||||
}
|
||||
|
||||
if revert.persisted != nil {
|
||||
err = <-revert.persisted
|
||||
}
|
||||
|
||||
return err
|
||||
return <-revert.persisted
|
||||
}
|
||||
|
|
|
@ -837,6 +837,11 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
|||
docBackIndexRowErr = err
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if cerr := kvreader.Close(); err == nil && cerr != nil {
|
||||
docBackIndexRowErr = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
for docID, doc := range batch.IndexOps {
|
||||
backIndexRow, err := backIndexRowForDoc(kvreader, index.IndexInternalID(docID))
|
||||
|
@ -847,12 +852,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
|||
|
||||
docBackIndexRowCh <- &docBackIndexRow{docID, doc, backIndexRow}
|
||||
}
|
||||
|
||||
err = kvreader.Close()
|
||||
if err != nil {
|
||||
docBackIndexRowErr = err
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
// wait for analysis result
|
||||
|
|
|
@ -15,12 +15,11 @@
|
|||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
|
@ -22,8 +23,6 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
|
|
|
@ -15,11 +15,10 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
type Collector interface {
|
||||
|
|
|
@ -15,11 +15,11 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
type collectorStore interface {
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
package goth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
"golang.org/x/oauth2"
|
||||
)
|
||||
|
||||
|
|
|
@ -4,17 +4,18 @@ package facebook
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/hmac"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"crypto/hmac"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"github.com/markbates/goth"
|
||||
"golang.org/x/oauth2"
|
||||
)
|
||||
|
@ -22,7 +23,7 @@ import (
|
|||
const (
|
||||
authURL string = "https://www.facebook.com/dialog/oauth"
|
||||
tokenURL string = "https://graph.facebook.com/oauth/access_token"
|
||||
endpointProfile string = "https://graph.facebook.com/me?fields=email,first_name,last_name,link,about,id,name,picture,location"
|
||||
endpointProfile string = "https://graph.facebook.com/me?fields="
|
||||
)
|
||||
|
||||
// New creates a new Facebook provider, and sets up important connection details.
|
||||
|
@ -68,9 +69,9 @@ func (p *Provider) Debug(debug bool) {}
|
|||
|
||||
// BeginAuth asks Facebook for an authentication end-point.
|
||||
func (p *Provider) BeginAuth(state string) (goth.Session, error) {
|
||||
url := p.config.AuthCodeURL(state)
|
||||
authUrl := p.config.AuthCodeURL(state)
|
||||
session := &Session{
|
||||
AuthURL: url,
|
||||
AuthURL: authUrl,
|
||||
}
|
||||
return session, nil
|
||||
}
|
||||
|
@ -96,7 +97,15 @@ func (p *Provider) FetchUser(session goth.Session) (goth.User, error) {
|
|||
hash.Write([]byte(sess.AccessToken))
|
||||
appsecretProof := hex.EncodeToString(hash.Sum(nil))
|
||||
|
||||
response, err := p.Client().Get(endpointProfile + "&access_token=" + url.QueryEscape(sess.AccessToken) + "&appsecret_proof=" + appsecretProof)
|
||||
reqUrl := fmt.Sprint(
|
||||
endpointProfile,
|
||||
strings.Join(p.config.Scopes, ","),
|
||||
"&access_token=",
|
||||
url.QueryEscape(sess.AccessToken),
|
||||
"&appsecret_proof=",
|
||||
appsecretProof,
|
||||
)
|
||||
response, err := p.Client().Get(reqUrl)
|
||||
if err != nil {
|
||||
return user, err
|
||||
}
|
||||
|
@ -168,17 +177,31 @@ func newConfig(provider *Provider, scopes []string) *oauth2.Config {
|
|||
},
|
||||
Scopes: []string{
|
||||
"email",
|
||||
"first_name",
|
||||
"last_name",
|
||||
"link",
|
||||
"about",
|
||||
"id",
|
||||
"name",
|
||||
"picture",
|
||||
"location",
|
||||
},
|
||||
}
|
||||
|
||||
defaultScopes := map[string]struct{}{
|
||||
"email": {},
|
||||
// creates possibility to invoke field method like 'picture.type(large)'
|
||||
var found bool
|
||||
for _, sc := range scopes {
|
||||
sc := sc
|
||||
for i, defScope := range c.Scopes {
|
||||
if defScope == strings.Split(sc, ".")[0] {
|
||||
c.Scopes[i] = sc
|
||||
found = true
|
||||
}
|
||||
|
||||
for _, scope := range scopes {
|
||||
if _, exists := defaultScopes[scope]; !exists {
|
||||
c.Scopes = append(c.Scopes, scope)
|
||||
}
|
||||
if !found {
|
||||
c.Scopes = append(c.Scopes, sc)
|
||||
}
|
||||
found = false
|
||||
}
|
||||
|
||||
return c
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.7
|
||||
|
||||
// Package ctxhttp provides helper functions for performing context-aware HTTP requests.
|
||||
package ctxhttp // import "golang.org/x/net/context/ctxhttp"
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
// Do sends an HTTP request with the provided http.Client and returns
|
||||
// an HTTP response.
|
||||
//
|
||||
// If the client is nil, http.DefaultClient is used.
|
||||
//
|
||||
// The provided ctx must be non-nil. If it is canceled or times out,
|
||||
// ctx.Err() will be returned.
|
||||
func Do(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error) {
|
||||
if client == nil {
|
||||
client = http.DefaultClient
|
||||
}
|
||||
resp, err := client.Do(req.WithContext(ctx))
|
||||
// If we got an error, and the context has been canceled,
|
||||
// the context's error is probably more useful.
|
||||
if err != nil {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
err = ctx.Err()
|
||||
default:
|
||||
}
|
||||
}
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// Get issues a GET request via the Do function.
|
||||
func Get(ctx context.Context, client *http.Client, url string) (*http.Response, error) {
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return Do(ctx, client, req)
|
||||
}
|
||||
|
||||
// Head issues a HEAD request via the Do function.
|
||||
func Head(ctx context.Context, client *http.Client, url string) (*http.Response, error) {
|
||||
req, err := http.NewRequest("HEAD", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return Do(ctx, client, req)
|
||||
}
|
||||
|
||||
// Post issues a POST request via the Do function.
|
||||
func Post(ctx context.Context, client *http.Client, url string, bodyType string, body io.Reader) (*http.Response, error) {
|
||||
req, err := http.NewRequest("POST", url, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", bodyType)
|
||||
return Do(ctx, client, req)
|
||||
}
|
||||
|
||||
// PostForm issues a POST request via the Do function.
|
||||
func PostForm(ctx context.Context, client *http.Client, url string, data url.Values) (*http.Response, error) {
|
||||
return Post(ctx, client, url, "application/x-www-form-urlencoded", strings.NewReader(data.Encode()))
|
||||
}
|
|
@ -0,0 +1,147 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !go1.7
|
||||
|
||||
package ctxhttp // import "golang.org/x/net/context/ctxhttp"
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
func nop() {}
|
||||
|
||||
var (
|
||||
testHookContextDoneBeforeHeaders = nop
|
||||
testHookDoReturned = nop
|
||||
testHookDidBodyClose = nop
|
||||
)
|
||||
|
||||
// Do sends an HTTP request with the provided http.Client and returns an HTTP response.
|
||||
// If the client is nil, http.DefaultClient is used.
|
||||
// If the context is canceled or times out, ctx.Err() will be returned.
|
||||
func Do(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error) {
|
||||
if client == nil {
|
||||
client = http.DefaultClient
|
||||
}
|
||||
|
||||
// TODO(djd): Respect any existing value of req.Cancel.
|
||||
cancel := make(chan struct{})
|
||||
req.Cancel = cancel
|
||||
|
||||
type responseAndError struct {
|
||||
resp *http.Response
|
||||
err error
|
||||
}
|
||||
result := make(chan responseAndError, 1)
|
||||
|
||||
// Make local copies of test hooks closed over by goroutines below.
|
||||
// Prevents data races in tests.
|
||||
testHookDoReturned := testHookDoReturned
|
||||
testHookDidBodyClose := testHookDidBodyClose
|
||||
|
||||
go func() {
|
||||
resp, err := client.Do(req)
|
||||
testHookDoReturned()
|
||||
result <- responseAndError{resp, err}
|
||||
}()
|
||||
|
||||
var resp *http.Response
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
testHookContextDoneBeforeHeaders()
|
||||
close(cancel)
|
||||
// Clean up after the goroutine calling client.Do:
|
||||
go func() {
|
||||
if r := <-result; r.resp != nil {
|
||||
testHookDidBodyClose()
|
||||
r.resp.Body.Close()
|
||||
}
|
||||
}()
|
||||
return nil, ctx.Err()
|
||||
case r := <-result:
|
||||
var err error
|
||||
resp, err = r.resp, r.err
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
}
|
||||
|
||||
c := make(chan struct{})
|
||||
go func() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
close(cancel)
|
||||
case <-c:
|
||||
// The response's Body is closed.
|
||||
}
|
||||
}()
|
||||
resp.Body = ¬ifyingReader{resp.Body, c}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// Get issues a GET request via the Do function.
|
||||
func Get(ctx context.Context, client *http.Client, url string) (*http.Response, error) {
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return Do(ctx, client, req)
|
||||
}
|
||||
|
||||
// Head issues a HEAD request via the Do function.
|
||||
func Head(ctx context.Context, client *http.Client, url string) (*http.Response, error) {
|
||||
req, err := http.NewRequest("HEAD", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return Do(ctx, client, req)
|
||||
}
|
||||
|
||||
// Post issues a POST request via the Do function.
|
||||
func Post(ctx context.Context, client *http.Client, url string, bodyType string, body io.Reader) (*http.Response, error) {
|
||||
req, err := http.NewRequest("POST", url, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", bodyType)
|
||||
return Do(ctx, client, req)
|
||||
}
|
||||
|
||||
// PostForm issues a POST request via the Do function.
|
||||
func PostForm(ctx context.Context, client *http.Client, url string, data url.Values) (*http.Response, error) {
|
||||
return Post(ctx, client, url, "application/x-www-form-urlencoded", strings.NewReader(data.Encode()))
|
||||
}
|
||||
|
||||
// notifyingReader is an io.ReadCloser that closes the notify channel after
|
||||
// Close is called or a Read fails on the underlying ReadCloser.
|
||||
type notifyingReader struct {
|
||||
io.ReadCloser
|
||||
notify chan<- struct{}
|
||||
}
|
||||
|
||||
func (r *notifyingReader) Read(p []byte) (int, error) {
|
||||
n, err := r.ReadCloser.Read(p)
|
||||
if err != nil && r.notify != nil {
|
||||
close(r.notify)
|
||||
r.notify = nil
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (r *notifyingReader) Close() error {
|
||||
err := r.ReadCloser.Close()
|
||||
if r.notify != nil {
|
||||
close(r.notify)
|
||||
r.notify = nil
|
||||
}
|
||||
return err
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
Copyright (c) 2009 The oauth2 Authors. All rights reserved.
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build appengine
|
||||
|
||||
// App Engine hooks.
|
||||
|
||||
package oauth2
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
"golang.org/x/oauth2/internal"
|
||||
"google.golang.org/appengine/urlfetch"
|
||||
)
|
||||
|
||||
func init() {
|
||||
internal.RegisterContextClientFunc(contextClientAppEngine)
|
||||
}
|
||||
|
||||
func contextClientAppEngine(ctx context.Context) (*http.Client, error) {
|
||||
return urlfetch.Client(ctx), nil
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build appengine
|
||||
|
||||
package internal
|
||||
|
||||
import "google.golang.org/appengine/urlfetch"
|
||||
|
||||
func init() {
|
||||
appengineClientHook = urlfetch.Client
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package internal contains support packages for oauth2 package.
|
||||
package internal
|
|
@ -2,18 +2,14 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package internal contains support packages for oauth2 package.
|
||||
package internal
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"crypto/rsa"
|
||||
"crypto/x509"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseKey converts the binary contents of a private key file
|
||||
|
@ -30,7 +26,7 @@ func ParseKey(key []byte) (*rsa.PrivateKey, error) {
|
|||
if err != nil {
|
||||
parsedKey, err = x509.ParsePKCS1PrivateKey(key)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("private key should be a PEM or plain PKSC1 or PKCS8; parse error: %v", err)
|
||||
return nil, fmt.Errorf("private key should be a PEM or plain PKCS1 or PKCS8; parse error: %v", err)
|
||||
}
|
||||
}
|
||||
parsed, ok := parsedKey.(*rsa.PrivateKey)
|
||||
|
@ -39,38 +35,3 @@ func ParseKey(key []byte) (*rsa.PrivateKey, error) {
|
|||
}
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func ParseINI(ini io.Reader) (map[string]map[string]string, error) {
|
||||
result := map[string]map[string]string{
|
||||
"": map[string]string{}, // root section
|
||||
}
|
||||
scanner := bufio.NewScanner(ini)
|
||||
currentSection := ""
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(line, ";") {
|
||||
// comment.
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(line, "[") && strings.HasSuffix(line, "]") {
|
||||
currentSection = strings.TrimSpace(line[1 : len(line)-1])
|
||||
result[currentSection] = map[string]string{}
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(line, "=", 2)
|
||||
if len(parts) == 2 && parts[0] != "" {
|
||||
result[currentSection][strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1])
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error scanning ini: %v", err)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func CondVal(v string) []string {
|
||||
if v == "" {
|
||||
return nil
|
||||
}
|
||||
return []string{v}
|
||||
}
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package internal contains support packages for oauth2 package.
|
||||
package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
@ -17,10 +18,10 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
"golang.org/x/net/context/ctxhttp"
|
||||
)
|
||||
|
||||
// Token represents the crendentials used to authorize
|
||||
// Token represents the credentials used to authorize
|
||||
// the requests to access protected resources on the OAuth 2.0
|
||||
// provider's backend.
|
||||
//
|
||||
|
@ -91,6 +92,7 @@ func (e *expirationTime) UnmarshalJSON(b []byte) error {
|
|||
|
||||
var brokenAuthHeaderProviders = []string{
|
||||
"https://accounts.google.com/",
|
||||
"https://api.codeswholesale.com/oauth/token",
|
||||
"https://api.dropbox.com/",
|
||||
"https://api.dropboxapi.com/",
|
||||
"https://api.instagram.com/",
|
||||
|
@ -99,10 +101,16 @@ var brokenAuthHeaderProviders = []string{
|
|||
"https://api.pushbullet.com/",
|
||||
"https://api.soundcloud.com/",
|
||||
"https://api.twitch.tv/",
|
||||
"https://id.twitch.tv/",
|
||||
"https://app.box.com/",
|
||||
"https://api.box.com/",
|
||||
"https://connect.stripe.com/",
|
||||
"https://login.mailchimp.com/",
|
||||
"https://login.microsoftonline.com/",
|
||||
"https://login.salesforce.com/",
|
||||
"https://login.windows.net",
|
||||
"https://login.live.com/",
|
||||
"https://login.live-int.com/",
|
||||
"https://oauth.sandbox.trainingpeaks.com/",
|
||||
"https://oauth.trainingpeaks.com/",
|
||||
"https://oauth.vk.com/",
|
||||
|
@ -117,6 +125,24 @@ var brokenAuthHeaderProviders = []string{
|
|||
"https://www.strava.com/oauth/",
|
||||
"https://www.wunderlist.com/oauth/",
|
||||
"https://api.patreon.com/",
|
||||
"https://sandbox.codeswholesale.com/oauth/token",
|
||||
"https://api.sipgate.com/v1/authorization/oauth",
|
||||
"https://api.medium.com/v1/tokens",
|
||||
"https://log.finalsurge.com/oauth/token",
|
||||
"https://multisport.todaysplan.com.au/rest/oauth/access_token",
|
||||
"https://whats.todaysplan.com.au/rest/oauth/access_token",
|
||||
"https://stackoverflow.com/oauth/access_token",
|
||||
"https://account.health.nokia.com",
|
||||
"https://accounts.zoho.com",
|
||||
}
|
||||
|
||||
// brokenAuthHeaderDomains lists broken providers that issue dynamic endpoints.
|
||||
var brokenAuthHeaderDomains = []string{
|
||||
".auth0.com",
|
||||
".force.com",
|
||||
".myshopify.com",
|
||||
".okta.com",
|
||||
".oktapreview.com",
|
||||
}
|
||||
|
||||
func RegisterBrokenAuthHeaderProvider(tokenURL string) {
|
||||
|
@ -139,6 +165,14 @@ func providerAuthHeaderWorks(tokenURL string) bool {
|
|||
}
|
||||
}
|
||||
|
||||
if u, err := url.Parse(tokenURL); err == nil {
|
||||
for _, s := range brokenAuthHeaderDomains {
|
||||
if strings.HasSuffix(u.Host, s) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assume the provider implements the spec properly
|
||||
// otherwise. We can add more exceptions as they're
|
||||
// discovered. We will _not_ be adding configurable hooks
|
||||
|
@ -147,24 +181,24 @@ func providerAuthHeaderWorks(tokenURL string) bool {
|
|||
}
|
||||
|
||||
func RetrieveToken(ctx context.Context, clientID, clientSecret, tokenURL string, v url.Values) (*Token, error) {
|
||||
hc, err := ContextClient(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
v.Set("client_id", clientID)
|
||||
bustedAuth := !providerAuthHeaderWorks(tokenURL)
|
||||
if bustedAuth && clientSecret != "" {
|
||||
if bustedAuth {
|
||||
if clientID != "" {
|
||||
v.Set("client_id", clientID)
|
||||
}
|
||||
if clientSecret != "" {
|
||||
v.Set("client_secret", clientSecret)
|
||||
}
|
||||
}
|
||||
req, err := http.NewRequest("POST", tokenURL, strings.NewReader(v.Encode()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
if !bustedAuth {
|
||||
req.SetBasicAuth(clientID, clientSecret)
|
||||
req.SetBasicAuth(url.QueryEscape(clientID), url.QueryEscape(clientSecret))
|
||||
}
|
||||
r, err := hc.Do(req)
|
||||
r, err := ctxhttp.Do(ctx, ContextClient(ctx), req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -174,7 +208,10 @@ func RetrieveToken(ctx context.Context, clientID, clientSecret, tokenURL string,
|
|||
return nil, fmt.Errorf("oauth2: cannot fetch token: %v", err)
|
||||
}
|
||||
if code := r.StatusCode; code < 200 || code > 299 {
|
||||
return nil, fmt.Errorf("oauth2: cannot fetch token: %v\nResponse: %s", r.Status, body)
|
||||
return nil, &RetrieveError{
|
||||
Response: r,
|
||||
Body: body,
|
||||
}
|
||||
}
|
||||
|
||||
var token *Token
|
||||
|
@ -221,5 +258,17 @@ func RetrieveToken(ctx context.Context, clientID, clientSecret, tokenURL string,
|
|||
if token.RefreshToken == "" {
|
||||
token.RefreshToken = v.Get("refresh_token")
|
||||
}
|
||||
if token.AccessToken == "" {
|
||||
return token, errors.New("oauth2: server response missing access_token")
|
||||
}
|
||||
return token, nil
|
||||
}
|
||||
|
||||
type RetrieveError struct {
|
||||
Response *http.Response
|
||||
Body []byte
|
||||
}
|
||||
|
||||
func (r *RetrieveError) Error() string {
|
||||
return fmt.Sprintf("oauth2: cannot fetch token: %v\nResponse: %s", r.Response.Status, r.Body)
|
||||
}
|
||||
|
|
|
@ -2,13 +2,11 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package internal contains support packages for oauth2 package.
|
||||
package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
// HTTPClient is the context key to use with golang.org/x/net/context's
|
||||
|
@ -20,50 +18,16 @@ var HTTPClient ContextKey
|
|||
// because nobody else can create a ContextKey, being unexported.
|
||||
type ContextKey struct{}
|
||||
|
||||
// ContextClientFunc is a func which tries to return an *http.Client
|
||||
// given a Context value. If it returns an error, the search stops
|
||||
// with that error. If it returns (nil, nil), the search continues
|
||||
// down the list of registered funcs.
|
||||
type ContextClientFunc func(context.Context) (*http.Client, error)
|
||||
var appengineClientHook func(context.Context) *http.Client
|
||||
|
||||
var contextClientFuncs []ContextClientFunc
|
||||
|
||||
func RegisterContextClientFunc(fn ContextClientFunc) {
|
||||
contextClientFuncs = append(contextClientFuncs, fn)
|
||||
}
|
||||
|
||||
func ContextClient(ctx context.Context) (*http.Client, error) {
|
||||
func ContextClient(ctx context.Context) *http.Client {
|
||||
if ctx != nil {
|
||||
if hc, ok := ctx.Value(HTTPClient).(*http.Client); ok {
|
||||
return hc, nil
|
||||
return hc
|
||||
}
|
||||
}
|
||||
for _, fn := range contextClientFuncs {
|
||||
c, err := fn(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if appengineClientHook != nil {
|
||||
return appengineClientHook(ctx)
|
||||
}
|
||||
if c != nil {
|
||||
return c, nil
|
||||
}
|
||||
}
|
||||
return http.DefaultClient, nil
|
||||
}
|
||||
|
||||
func ContextTransport(ctx context.Context) http.RoundTripper {
|
||||
hc, err := ContextClient(ctx)
|
||||
// This is a rare error case (somebody using nil on App Engine).
|
||||
if err != nil {
|
||||
return ErrorTransport{err}
|
||||
}
|
||||
return hc.Transport
|
||||
}
|
||||
|
||||
// ErrorTransport returns the specified error on RoundTrip.
|
||||
// This RoundTripper should be used in rare error cases where
|
||||
// error handling can be postponed to response handling time.
|
||||
type ErrorTransport struct{ Err error }
|
||||
|
||||
func (t ErrorTransport) RoundTrip(*http.Request) (*http.Response, error) {
|
||||
return nil, t.Err
|
||||
return http.DefaultClient
|
||||
}
|
||||
|
|
|
@ -3,19 +3,20 @@
|
|||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package oauth2 provides support for making
|
||||
// OAuth2 authorized and authenticated HTTP requests.
|
||||
// OAuth2 authorized and authenticated HTTP requests,
|
||||
// as specified in RFC 6749.
|
||||
// It can additionally grant authorization with Bearer JWT.
|
||||
package oauth2 // import "golang.org/x/oauth2"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
"golang.org/x/oauth2/internal"
|
||||
)
|
||||
|
||||
|
@ -117,21 +118,30 @@ func SetAuthURLParam(key, value string) AuthCodeOption {
|
|||
// that asks for permissions for the required scopes explicitly.
|
||||
//
|
||||
// State is a token to protect the user from CSRF attacks. You must
|
||||
// always provide a non-zero string and validate that it matches the
|
||||
// always provide a non-empty string and validate that it matches the
|
||||
// the state query parameter on your redirect callback.
|
||||
// See http://tools.ietf.org/html/rfc6749#section-10.12 for more info.
|
||||
//
|
||||
// Opts may include AccessTypeOnline or AccessTypeOffline, as well
|
||||
// as ApprovalForce.
|
||||
// It can also be used to pass the PKCE challange.
|
||||
// See https://www.oauth.com/oauth2-servers/pkce/ for more info.
|
||||
func (c *Config) AuthCodeURL(state string, opts ...AuthCodeOption) string {
|
||||
var buf bytes.Buffer
|
||||
buf.WriteString(c.Endpoint.AuthURL)
|
||||
v := url.Values{
|
||||
"response_type": {"code"},
|
||||
"client_id": {c.ClientID},
|
||||
"redirect_uri": internal.CondVal(c.RedirectURL),
|
||||
"scope": internal.CondVal(strings.Join(c.Scopes, " ")),
|
||||
"state": internal.CondVal(state),
|
||||
}
|
||||
if c.RedirectURL != "" {
|
||||
v.Set("redirect_uri", c.RedirectURL)
|
||||
}
|
||||
if len(c.Scopes) > 0 {
|
||||
v.Set("scope", strings.Join(c.Scopes, " "))
|
||||
}
|
||||
if state != "" {
|
||||
// TODO(light): Docs say never to omit state; don't allow empty.
|
||||
v.Set("state", state)
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt.setValue(v)
|
||||
|
@ -157,12 +167,15 @@ func (c *Config) AuthCodeURL(state string, opts ...AuthCodeOption) string {
|
|||
// The HTTP client to use is derived from the context.
|
||||
// If nil, http.DefaultClient is used.
|
||||
func (c *Config) PasswordCredentialsToken(ctx context.Context, username, password string) (*Token, error) {
|
||||
return retrieveToken(ctx, c, url.Values{
|
||||
v := url.Values{
|
||||
"grant_type": {"password"},
|
||||
"username": {username},
|
||||
"password": {password},
|
||||
"scope": internal.CondVal(strings.Join(c.Scopes, " ")),
|
||||
})
|
||||
}
|
||||
if len(c.Scopes) > 0 {
|
||||
v.Set("scope", strings.Join(c.Scopes, " "))
|
||||
}
|
||||
return retrieveToken(ctx, c, v)
|
||||
}
|
||||
|
||||
// Exchange converts an authorization code into a token.
|
||||
|
@ -175,13 +188,21 @@ func (c *Config) PasswordCredentialsToken(ctx context.Context, username, passwor
|
|||
//
|
||||
// The code will be in the *http.Request.FormValue("code"). Before
|
||||
// calling Exchange, be sure to validate FormValue("state").
|
||||
func (c *Config) Exchange(ctx context.Context, code string) (*Token, error) {
|
||||
return retrieveToken(ctx, c, url.Values{
|
||||
//
|
||||
// Opts may include the PKCE verifier code if previously used in AuthCodeURL.
|
||||
// See https://www.oauth.com/oauth2-servers/pkce/ for more info.
|
||||
func (c *Config) Exchange(ctx context.Context, code string, opts ...AuthCodeOption) (*Token, error) {
|
||||
v := url.Values{
|
||||
"grant_type": {"authorization_code"},
|
||||
"code": {code},
|
||||
"redirect_uri": internal.CondVal(c.RedirectURL),
|
||||
"scope": internal.CondVal(strings.Join(c.Scopes, " ")),
|
||||
})
|
||||
}
|
||||
if c.RedirectURL != "" {
|
||||
v.Set("redirect_uri", c.RedirectURL)
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt.setValue(v)
|
||||
}
|
||||
return retrieveToken(ctx, c, v)
|
||||
}
|
||||
|
||||
// Client returns an HTTP client using the provided token.
|
||||
|
@ -292,20 +313,20 @@ var HTTPClient internal.ContextKey
|
|||
// NewClient creates an *http.Client from a Context and TokenSource.
|
||||
// The returned client is not valid beyond the lifetime of the context.
|
||||
//
|
||||
// Note that if a custom *http.Client is provided via the Context it
|
||||
// is used only for token acquisition and is not used to configure the
|
||||
// *http.Client returned from NewClient.
|
||||
//
|
||||
// As a special case, if src is nil, a non-OAuth2 client is returned
|
||||
// using the provided context. This exists to support related OAuth2
|
||||
// packages.
|
||||
func NewClient(ctx context.Context, src TokenSource) *http.Client {
|
||||
if src == nil {
|
||||
c, err := internal.ContextClient(ctx)
|
||||
if err != nil {
|
||||
return &http.Client{Transport: internal.ErrorTransport{Err: err}}
|
||||
}
|
||||
return c
|
||||
return internal.ContextClient(ctx)
|
||||
}
|
||||
return &http.Client{
|
||||
Transport: &Transport{
|
||||
Base: internal.ContextTransport(ctx),
|
||||
Base: internal.ContextClient(ctx).Transport,
|
||||
Source: ReuseTokenSource(nil, src),
|
||||
},
|
||||
}
|
||||
|
|
|
@ -5,13 +5,14 @@
|
|||
package oauth2
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
"golang.org/x/oauth2/internal"
|
||||
)
|
||||
|
||||
|
@ -20,7 +21,7 @@ import (
|
|||
// expirations due to client-server time mismatches.
|
||||
const expiryDelta = 10 * time.Second
|
||||
|
||||
// Token represents the crendentials used to authorize
|
||||
// Token represents the credentials used to authorize
|
||||
// the requests to access protected resources on the OAuth 2.0
|
||||
// provider's backend.
|
||||
//
|
||||
|
@ -123,7 +124,7 @@ func (t *Token) expired() bool {
|
|||
if t.Expiry.IsZero() {
|
||||
return false
|
||||
}
|
||||
return t.Expiry.Add(-expiryDelta).Before(time.Now())
|
||||
return t.Expiry.Round(0).Add(-expiryDelta).Before(time.Now())
|
||||
}
|
||||
|
||||
// Valid reports whether t is non-nil, has an AccessToken, and is not expired.
|
||||
|
@ -152,7 +153,23 @@ func tokenFromInternal(t *internal.Token) *Token {
|
|||
func retrieveToken(ctx context.Context, c *Config, v url.Values) (*Token, error) {
|
||||
tk, err := internal.RetrieveToken(ctx, c.ClientID, c.ClientSecret, c.Endpoint.TokenURL, v)
|
||||
if err != nil {
|
||||
if rErr, ok := err.(*internal.RetrieveError); ok {
|
||||
return nil, (*RetrieveError)(rErr)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return tokenFromInternal(tk), nil
|
||||
}
|
||||
|
||||
// RetrieveError is the error returned when the token endpoint returns a
|
||||
// non-2XX HTTP status code.
|
||||
type RetrieveError struct {
|
||||
Response *http.Response
|
||||
// Body is the body that was consumed by reading Response.Body.
|
||||
// It may be truncated.
|
||||
Body []byte
|
||||
}
|
||||
|
||||
func (r *RetrieveError) Error() string {
|
||||
return fmt.Sprintf("oauth2: cannot fetch token: %v\nResponse: %s", r.Response.Status, r.Body)
|
||||
}
|
||||
|
|
|
@ -31,9 +31,17 @@ type Transport struct {
|
|||
}
|
||||
|
||||
// RoundTrip authorizes and authenticates the request with an
|
||||
// access token. If no token exists or token is expired,
|
||||
// tries to refresh/fetch a new token.
|
||||
// access token from Transport's Source.
|
||||
func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
reqBodyClosed := false
|
||||
if req.Body != nil {
|
||||
defer func() {
|
||||
if !reqBodyClosed {
|
||||
req.Body.Close()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
if t.Source == nil {
|
||||
return nil, errors.New("oauth2: Transport's Source is nil")
|
||||
}
|
||||
|
@ -46,6 +54,10 @@ func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
|
|||
token.SetAuthHeader(req2)
|
||||
t.setModReq(req, req2)
|
||||
res, err := t.base().RoundTrip(req2)
|
||||
|
||||
// req.Body is assumed to have been closed by the base RoundTripper.
|
||||
reqBodyClosed = true
|
||||
|
||||
if err != nil {
|
||||
t.setModReq(req, nil)
|
||||
return nil, err
|
||||
|
|
Loading…
Reference in New Issue