From 2fd928b918392fef280caa3103071f8b74c4f4c0 Mon Sep 17 00:00:00 2001 From: Matt Joiner Date: Fri, 12 Nov 2021 12:37:40 +1100 Subject: [PATCH] Don't use non-directory webseed URLs for multi-file torrents --- peer-impl.go | 8 ++++++++ peerconn.go | 40 +++++++++++++++++++++++++--------------- torrent.go | 1 - webseed-peer.go | 17 +++++++++++++---- webseed/client.go | 26 ++++++++++++++++++++++---- 5 files changed, 68 insertions(+), 24 deletions(-) diff --git a/peer-impl.go b/peer-impl.go index b5cf028e..f7140377 100644 --- a/peer-impl.go +++ b/peer-impl.go @@ -1,6 +1,7 @@ package torrent import ( + "github.com/RoaringBitmap/roaring" "github.com/anacrolix/torrent/metainfo" ) @@ -25,4 +26,11 @@ type peerImpl interface { drop() String() string connStatusString() string + + // All if the peer should have everything, known if we know that for a fact. For example, we can + // guess at how many pieces are in a torrent, and assume they have all pieces based on them + // having sent haves for everything, but we don't know for sure. But if they send a have-all + // message, then it's clear that they do. + peerHasAllPieces() (all, known bool) + peerPieces() *roaring.Bitmap } diff --git a/peerconn.go b/peerconn.go index 4b69463a..9a3fb0fc 100644 --- a/peerconn.go +++ b/peerconn.go @@ -111,11 +111,6 @@ type Peer struct { peerRequests map[Request]*peerRequestState PeerPrefersEncryption bool // as indicated by 'e' field in extension handshake PeerListenPort int - // The pieces the peer has claimed to have. - _peerPieces roaring.Bitmap - // The peer has everything. This can occur due to a special message, when - // we may not even know the number of pieces in the torrent yet. - peerSentHaveAll bool // The highest possible number of pieces the torrent could have based on // communication with the peer. Generally only useful until we have the // torrent info. @@ -154,6 +149,12 @@ type PeerConn struct { uploadTimer *time.Timer pex pexConnState + + // The pieces the peer has claimed to have. + _peerPieces roaring.Bitmap + // The peer has everything. This can occur due to a special message, when + // we may not even know the number of pieces in the torrent yet. + peerSentHaveAll bool } func (cn *PeerConn) connStatusString() string { @@ -233,7 +234,7 @@ func (cn *Peer) cumInterest() time.Duration { return ret } -func (cn *Peer) peerHasAllPieces() (all bool, known bool) { +func (cn *PeerConn) peerHasAllPieces() (all bool, known bool) { if cn.peerSentHaveAll { return true, true } @@ -261,8 +262,8 @@ func (cn *Peer) bestPeerNumPieces() pieceIndex { } func (cn *Peer) completedString() string { - have := pieceIndex(cn._peerPieces.GetCardinality()) - if cn.peerSentHaveAll { + have := pieceIndex(cn.peerPieces().GetCardinality()) + if all, _ := cn.peerHasAllPieces(); all { have = cn.bestPeerNumPieces() } return fmt.Sprintf("%d/%d", have, cn.bestPeerNumPieces()) @@ -279,6 +280,10 @@ func (cn *PeerConn) setNumPieces(num pieceIndex) { cn.peerPiecesChanged() } +func (cn *PeerConn) peerPieces() *roaring.Bitmap { + return &cn._peerPieces +} + func eventAgeString(t time.Time) string { if t.IsZero() { return "never" @@ -428,8 +433,13 @@ func (cn *PeerConn) onClose() { } } +// Peer definitely has a piece, for purposes of requesting. So it's not sufficient that we think +// they do (known=true). func (cn *Peer) peerHasPiece(piece pieceIndex) bool { - return cn.peerSentHaveAll || cn._peerPieces.Contains(bitmap.BitIndex(piece)) + if all, known := cn.peerHasAllPieces(); all && known { + return true + } + return cn.peerPieces().ContainsInt(piece) } // 64KiB, but temporarily less to work around an issue with WebRTC. TODO: Update when @@ -789,7 +799,7 @@ func (cn *PeerConn) peerSentBitfield(bf []bool) error { return nil } -func (cn *Peer) onPeerHasAllPieces() { +func (cn *PeerConn) onPeerHasAllPieces() { t := cn.t if t.haveInfo() { npp, pc := cn.newPeerPieces(), t.numPieces() @@ -1509,13 +1519,13 @@ func (cn *Peer) netGoodPiecesDirtied() int64 { } func (c *Peer) peerHasWantedPieces() bool { - if c.peerSentHaveAll { + if all, _ := c.peerHasAllPieces(); all { return !c.t.haveAllPieces() } if !c.t.haveInfo() { - return !c._peerPieces.IsEmpty() + return !c.peerPieces().IsEmpty() } - return c._peerPieces.Intersects(&c.t._pendingPieces) + return c.peerPieces().Intersects(&c.t._pendingPieces) } func (c *Peer) deleteRequest(r RequestIndex) bool { @@ -1646,8 +1656,8 @@ func (cn *PeerConn) PeerPieces() *roaring.Bitmap { // Returns a new Bitmap that includes bits for all pieces the peer could have based on their claims. func (cn *Peer) newPeerPieces() *roaring.Bitmap { // TODO: Can we use copy on write? - ret := cn._peerPieces.Clone() - if cn.peerSentHaveAll { + ret := cn.peerPieces().Clone() + if all, _ := cn.peerHasAllPieces(); all { if cn.t.haveInfo() { ret.AddRange(0, bitmap.BitRange(cn.t.numPieces())) } else { diff --git a/torrent.go b/torrent.go index c8025aae..80ad8e4b 100644 --- a/torrent.go +++ b/torrent.go @@ -2227,7 +2227,6 @@ func (t *Torrent) addWebSeed(url string) { ws.onGotInfo(t.info) } t.webSeeds[url] = &ws.peer - ws.peer.onPeerHasAllPieces() } func (t *Torrent) peerIsActive(p *Peer) (active bool) { diff --git a/webseed-peer.go b/webseed-peer.go index 71cdfcb4..94adabe9 100644 --- a/webseed-peer.go +++ b/webseed-peer.go @@ -8,11 +8,10 @@ import ( "strings" "sync" + "github.com/RoaringBitmap/roaring" "github.com/anacrolix/log" - "github.com/anacrolix/torrent/common" "github.com/anacrolix/torrent/metainfo" pp "github.com/anacrolix/torrent/peer_protocol" - "github.com/anacrolix/torrent/segments" "github.com/anacrolix/torrent/webseed" ) @@ -36,8 +35,7 @@ func (ws *webseedPeer) String() string { } func (ws *webseedPeer) onGotInfo(info *metainfo.Info) { - ws.client.FileIndex = segments.NewIndex(common.LengthIterFromUpvertedFiles(info.UpvertedFiles())) - ws.client.Info = info + ws.client.SetInfo(info) } func (ws *webseedPeer) writeInterested(interested bool) bool { @@ -165,3 +163,14 @@ func (ws *webseedPeer) requestResultHandler(r Request, webseedRequest webseed.Re func (me *webseedPeer) isLowOnRequests() bool { return me.peer.actualRequestState.Requests.GetCardinality() < uint64(me.maxRequests) } + +func (me *webseedPeer) peerPieces() *roaring.Bitmap { + return &me.client.Pieces +} + +func (cn *webseedPeer) peerHasAllPieces() (all, known bool) { + if !cn.peer.t.haveInfo() { + return true, false + } + return cn.client.Pieces.GetCardinality() == uint64(cn.peer.t.numPieces()), true +} diff --git a/webseed/client.go b/webseed/client.go index cc17b339..3a03fb1b 100644 --- a/webseed/client.go +++ b/webseed/client.go @@ -6,7 +6,10 @@ import ( "fmt" "io" "net/http" + "strings" + "github.com/RoaringBitmap/roaring" + "github.com/anacrolix/torrent/common" "github.com/anacrolix/torrent/metainfo" "github.com/anacrolix/torrent/segments" ) @@ -36,8 +39,23 @@ func (r Request) Cancel() { type Client struct { HttpClient *http.Client Url string - FileIndex segments.Index - Info *metainfo.Info + fileIndex segments.Index + info *metainfo.Info + // The pieces we can request with the Url. We're more likely to ban/block at the file-level + // given that's how requests are mapped to webseeds, but the torrent.Client works at the piece + // level. We can map our file-level adjustments to the pieces here. + Pieces roaring.Bitmap +} + +func (me *Client) SetInfo(info *metainfo.Info) { + if !strings.HasSuffix(me.Url, "/") && info.IsDir() { + // In my experience, this is a non-conforming webseed. For example the + // http://ia600500.us.archive.org/1/items URLs in archive.org torrents. + return + } + me.fileIndex = segments.NewIndex(common.LengthIterFromUpvertedFiles(info.UpvertedFiles())) + me.info = info + me.Pieces.AddRange(0, uint64(info.NumPieces())) } type RequestResult struct { @@ -48,8 +66,8 @@ type RequestResult struct { func (ws *Client) NewRequest(r RequestSpec) Request { ctx, cancel := context.WithCancel(context.Background()) var requestParts []requestPart - if !ws.FileIndex.Locate(r, func(i int, e segments.Extent) bool { - req, err := NewRequest(ws.Url, i, ws.Info, e.Start, e.Length) + if !ws.fileIndex.Locate(r, func(i int, e segments.Extent) bool { + req, err := NewRequest(ws.Url, i, ws.info, e.Start, e.Length) if err != nil { panic(err) }