From a79c3bd5d30d99fb33ab7c5334c7d017884d4403 Mon Sep 17 00:00:00 2001 From: afjoseph <7126721+afjoseph@users.noreply.github.com> Date: Fri, 22 Apr 2022 04:23:43 +0200 Subject: [PATCH] fixup! [webseed] Add a custom URL encoder for webseeds --- client.go | 2 +- spec.go | 3 +- torrent.go | 18 ++++++------ webseed/client.go | 6 ++-- webseed/request.go | 54 ++++++++++++++++++++++-------------- webseed/request_test.go | 61 +++++++++++++++++++++++++++++++++++------ 6 files changed, 101 insertions(+), 43 deletions(-) diff --git a/client.go b/client.go index 7d763c13..df746ecd 100644 --- a/client.go +++ b/client.go @@ -1326,7 +1326,7 @@ func (t *Torrent) MergeSpec(spec *TorrentSpec) error { defer cl.unlock() t.initialPieceCheckDisabled = spec.DisableInitialPieceCheck for _, url := range spec.Webseeds { - t.addWebSeed(url, spec.EncodeWebSeedUrl) + t.addWebSeed(url, spec.DefaultWebseedEscapePath) } for _, peerAddr := range spec.PeerAddrs { t.addPeer(PeerInfo{ diff --git a/spec.go b/spec.go index df5b06f9..6d290e8d 100644 --- a/spec.go +++ b/spec.go @@ -6,6 +6,7 @@ import ( "github.com/anacrolix/torrent/metainfo" pp "github.com/anacrolix/torrent/peer_protocol" "github.com/anacrolix/torrent/storage" + "github.com/anacrolix/torrent/webseed" ) // Specifies a new torrent for adding to a client, or additions to an existing Torrent. There are @@ -39,7 +40,7 @@ type TorrentSpec struct { // Custom encoder for webseed URLs // Leave nil to use the default (url.QueryEscape) - EncodeWebSeedUrl func(string) string + DefaultWebseedEscapePath webseed.PathEscaper } func TorrentSpecFromMagnetUri(uri string) (spec *TorrentSpec, err error) { diff --git a/torrent.go b/torrent.go index 7810e9d1..a08da702 100644 --- a/torrent.go +++ b/torrent.go @@ -2350,28 +2350,30 @@ func (t *Torrent) callbacks() *Callbacks { return &t.cl.config.Callbacks } -type AddWebSeedsOptions struct { +type AddWebseedsOpt func() *AddWebseedOpts + +type AddWebseedOpts struct { // Custom encoder for webseed URLs // Leave nil to use the default (url.QueryEscape) - EncodeUrl func(string) string + PathEscaper webseed.PathEscaper } // Add web seeds to the torrent. -// If AddWebSeedsOptions is not nil, only the first one is used. -func (t *Torrent) AddWebSeeds(urls []string, opts ...AddWebSeedsOptions) { +// If opt is not nil, only the first one is used. +func (t *Torrent) AddWebSeeds(urls []string, opt ...AddWebseedsOpt) { t.cl.lock() defer t.cl.unlock() for _, u := range urls { - if opts == nil { + if opt == nil { t.addWebSeed(u, nil) } else { - t.addWebSeed(u, opts[0].EncodeUrl) + t.addWebSeed(u, opt[0]().PathEscaper) } } } -func (t *Torrent) addWebSeed(url string, encodeUrl func(string) string) { +func (t *Torrent) addWebSeed(url string, pathEscaper webseed.PathEscaper) { if t.cl.config.DisableWebseeds { return } @@ -2408,7 +2410,7 @@ func (t *Torrent) addWebSeed(url string, encodeUrl func(string) string) { r: r, } }, - EncodeUrl: encodeUrl, + PathEscaper: pathEscaper, }, activeRequests: make(map[Request]webseed.Request, maxRequests), maxRequests: maxRequests, diff --git a/webseed/client.go b/webseed/client.go index ce8e3a42..d218cf0e 100644 --- a/webseed/client.go +++ b/webseed/client.go @@ -57,7 +57,7 @@ type Client struct { // private in the future, if Client ever starts removing pieces. Pieces roaring.Bitmap ResponseBodyWrapper ResponseBodyWrapper - EncodeUrl func(string) string + PathEscaper PathEscaper } type ResponseBodyWrapper func(io.Reader) io.Reader @@ -82,9 +82,9 @@ func (ws *Client) NewRequest(r RequestSpec) Request { ctx, cancel := context.WithCancel(context.Background()) var requestParts []requestPart if !ws.fileIndex.Locate(r, func(i int, e segments.Extent) bool { - req, err := NewRequestWithCustomUrlEncoding( + req, err := NewRequestWithOpts( ws.Url, i, ws.info, e.Start, e.Length, - ws.EncodeUrl, + ws.PathEscaper, ) if err != nil { panic(err) diff --git a/webseed/request.go b/webseed/request.go index 3d4b4931..460448d0 100644 --- a/webseed/request.go +++ b/webseed/request.go @@ -10,58 +10,70 @@ import ( "github.com/anacrolix/torrent/metainfo" ) +type PathEscaper func(pathComps []string) string + // Escapes path name components suitable for appending to a webseed URL. This works for converting // S3 object keys to URLs too. +// // Contrary to the name, this actually does a QueryEscape, rather than a // PathEscape. This works better with most S3 providers. You can use -// EscapePathWithCustomEncoding for a custom encoding. +// EscapePathWithOpts for a custom encoding. func EscapePath(pathComps []string) string { return escapePath(pathComps, nil) } -func escapePath(pathComps []string, encodeUrl func(string) string) string { - if encodeUrl == nil { - encodeUrl = url.QueryEscape - } - return path.Join( - func() (ret []string) { - for _, comp := range pathComps { - ret = append(ret, encodeUrl(comp)) - } - return - }()..., - ) +func EscapePathWithCustomEscaper(pathComps []string, pathEscaper PathEscaper) string { + return escapePath(pathComps, pathEscaper) } -func trailingPath(infoName string, fileComps []string, encodeUrl func(string) string) string { - return escapePath(append([]string{infoName}, fileComps...), encodeUrl) +func escapePath(pathComps []string, pathEscaper PathEscaper) string { + if pathEscaper != nil { + return pathEscaper(pathComps) + } + + var ret []string + for _, comp := range pathComps { + ret = append(ret, url.QueryEscape(comp)) + } + return path.Join(ret...) +} + +func trailingPath( + infoName string, + fileComps []string, + pathEscaper PathEscaper, +) string { + return escapePath(append([]string{infoName}, fileComps...), pathEscaper) } // Creates a request per BEP 19. -func NewRequest(url_ string, fileIndex int, info *metainfo.Info, offset, length int64) (*http.Request, error) { +func NewRequest( + url_ string, + fileIndex int, info *metainfo.Info, + offset, length int64) (*http.Request, error) { return newRequest(url_, fileIndex, info, offset, length, nil) } -func NewRequestWithCustomUrlEncoding( +func NewRequestWithOpts( url_ string, fileIndex int, info *metainfo.Info, offset, length int64, - encodeUrl func(string) string, + pathEscaper PathEscaper, ) (*http.Request, error) { - return newRequest(url_, fileIndex, info, offset, length, encodeUrl) + return newRequest(url_, fileIndex, info, offset, length, pathEscaper) } func newRequest( url_ string, fileIndex int, info *metainfo.Info, offset, length int64, - encodeUrl func(string) string, + pathEscaper PathEscaper, ) (*http.Request, error) { fileInfo := info.UpvertedFiles()[fileIndex] if strings.HasSuffix(url_, "/") { // BEP specifies that we append the file path. We need to escape each component of the path // for things like spaces and '#'. - url_ += trailingPath(info.Name, fileInfo.Path, encodeUrl) + url_ += escapePath(append([]string{info.Name}, fileInfo.Path...), pathEscaper) } req, err := http.NewRequest(http.MethodGet, url_, nil) if err != nil { diff --git a/webseed/request_test.go b/webseed/request_test.go index b59e7784..d39e20e9 100644 --- a/webseed/request_test.go +++ b/webseed/request_test.go @@ -2,28 +2,71 @@ package webseed import ( "net/url" + "path" "testing" qt "github.com/frankban/quicktest" ) -func TestTrailingPath(t *testing.T) { +func TestEscapePath(t *testing.T) { c := qt.New(t) - test := func(parts []string, result string) { - unescaped, err := url.QueryUnescape(trailingPath(parts[0], parts[1:], url.QueryEscape)) + test := func( + parts []string, result string, + escaper PathEscaper, + unescaper func(string) (string, error), + ) { + unescaped, err := unescaper(escapePath(parts, escaper)) if !c.Check(err, qt.IsNil) { return } c.Check(unescaped, qt.Equals, result) } - test([]string{"a_b-c", "d + e.f"}, "a_b-c/d + e.f") - test([]string{"a_1-b_c2", "d 3. (e, f).g"}, + + // Test with nil escapers (always uses url.QueryEscape) + // ------ + test( + []string{"a_b-c", "d + e.f"}, + "a_b-c/d + e.f", + nil, + url.QueryUnescape, + ) + test( + []string{"a_1-b_c2", "d 3. (e, f).g"}, "a_1-b_c2/d 3. (e, f).g", + nil, + url.QueryUnescape, + ) + + // Test with custom escapers + // ------ + test( + []string{"a_b-c", "d + e.f"}, + "a_b-c/d + e.f", + func(s []string) string { + var ret []string + for _, comp := range s { + ret = append(ret, url.PathEscape(comp)) + } + return path.Join(ret...) + }, + url.PathUnescape, + ) + test( + []string{"a_1-b_c2", "d 3. (e, f).g"}, + "a_1-b_c2/d 3. (e, f).g", + func(s []string) string { + var ret []string + for _, comp := range s { + ret = append(ret, url.PathEscape(comp)) + } + return path.Join(ret...) + }, + url.PathUnescape, ) } -func TestTrailingPathForEmptyInfoName(t *testing.T) { - qt.Check(t, trailingPath("", []string{`ノ┬─┬ノ ︵ ( \o°o)\`}, url.QueryEscape), qt.Equals, "%E3%83%8E%E2%94%AC%E2%94%80%E2%94%AC%E3%83%8E+%EF%B8%B5+%28+%5Co%C2%B0o%29%5C") - qt.Check(t, trailingPath("", []string{"hello", "world"}, url.QueryEscape), qt.Equals, "hello/world") - qt.Check(t, trailingPath("war", []string{"and", "peace"}, url.QueryEscape), qt.Equals, "war/and/peace") +func TestEscapePathForEmptyInfoName(t *testing.T) { + qt.Check(t, escapePath([]string{`ノ┬─┬ノ ︵ ( \o°o)\`}, nil), qt.Equals, "%E3%83%8E%E2%94%AC%E2%94%80%E2%94%AC%E3%83%8E+%EF%B8%B5+%28+%5Co%C2%B0o%29%5C") + qt.Check(t, escapePath([]string{"hello", "world"}, nil), qt.Equals, "hello/world") + qt.Check(t, escapePath([]string{"war", "and", "peace"}, nil), qt.Equals, "war/and/peace") }