From 22428da0e3a220b08ab9025e7f185195b0fec84c Mon Sep 17 00:00:00 2001 From: Matt Joiner Date: Wed, 23 Sep 2015 18:25:22 +1000 Subject: [PATCH] Add packed IP list This saves a lot of memory by allowing the IP blocklist to be mmap()ed in. In production with the latest level1 blocklist it's 35MB per process. --- client.go | 32 +++++++++- dht/dht.go | 8 +-- doc.go | 5 +- iplist/cmd/pack-blocklist/main.go | 27 ++++++++ iplist/iplist.go | 38 +++++++---- iplist/iplist_test.go | 76 ++++++++++++++-------- iplist/packed.go | 103 ++++++++++++++++++++++++++++++ iplist/packed_test.go | 35 ++++++++++ 8 files changed, 276 insertions(+), 48 deletions(-) create mode 100644 iplist/cmd/pack-blocklist/main.go create mode 100644 iplist/packed.go create mode 100644 iplist/packed_test.go diff --git a/client.go b/client.go index 5dc2992f..559bcca9 100644 --- a/client.go +++ b/client.go @@ -29,6 +29,7 @@ import ( "github.com/anacrolix/sync" "github.com/anacrolix/utp" "github.com/bradfitz/iter" + "github.com/edsrzf/mmap-go" "github.com/anacrolix/torrent/bencode" "github.com/anacrolix/torrent/data" @@ -139,7 +140,7 @@ type Client struct { listeners []net.Listener utpSock *utp.Socket dHT *dht.Server - ipBlockList *iplist.IPList + ipBlockList iplist.Ranger bannedTorrents map[InfoHash]struct{} config Config pruneTimer *time.Timer @@ -158,13 +159,13 @@ type Client struct { torrents map[InfoHash]*torrent } -func (me *Client) IPBlockList() *iplist.IPList { +func (me *Client) IPBlockList() iplist.Ranger { me.mu.Lock() defer me.mu.Unlock() return me.ipBlockList } -func (me *Client) SetIPBlockList(list *iplist.IPList) { +func (me *Client) SetIPBlockList(list iplist.Ranger) { me.mu.Lock() defer me.mu.Unlock() me.ipBlockList = list @@ -382,10 +383,35 @@ func (cl *Client) prioritizePiece(t *torrent, piece int, priority piecePriority) } } +func loadPackedBlocklist(filename string) (ret iplist.Ranger, err error) { + f, err := os.Open(filename) + if os.IsNotExist(err) { + err = nil + return + } + if err != nil { + return + } + defer f.Close() + mm, err := mmap.Map(f, mmap.RDONLY, 0) + if err != nil { + return + } + ret = iplist.NewFromPacked(mm) + return +} + func (cl *Client) setEnvBlocklist() (err error) { filename := os.Getenv("TORRENT_BLOCKLIST_FILE") defaultBlocklist := filename == "" if defaultBlocklist { + cl.ipBlockList, err = loadPackedBlocklist(filepath.Join(cl.configDir(), "packed-blocklist")) + if err != nil { + return + } + if cl.ipBlockList != nil { + return + } filename = filepath.Join(cl.configDir(), "blocklist") } f, err := os.Open(filename) diff --git a/dht/dht.go b/dht/dht.go index ecae42fb..d7b6cc17 100644 --- a/dht/dht.go +++ b/dht/dht.go @@ -50,7 +50,7 @@ type Server struct { nodes map[string]*node // Keyed by dHTAddr.String(). mu sync.Mutex closed chan struct{} - ipBlockList *iplist.IPList + ipBlockList iplist.Ranger badNodes *boom.BloomFilter numConfirmedAnnounces int @@ -70,7 +70,7 @@ type ServerConfig struct { NoSecurity bool // Initial IP blocklist to use. Applied before serving and bootstrapping // begins. - IPBlocklist *iplist.IPList + IPBlocklist iplist.Ranger // Used to secure the server's ID. Defaults to the Conn's LocalAddr(). PublicIP net.IP } @@ -595,13 +595,13 @@ func (s *Server) setDefaults() (err error) { } // Packets to and from any address matching a range in the list are dropped. -func (s *Server) SetIPBlockList(list *iplist.IPList) { +func (s *Server) SetIPBlockList(list iplist.Ranger) { s.mu.Lock() defer s.mu.Unlock() s.ipBlockList = list } -func (s *Server) IPBlocklist() *iplist.IPList { +func (s *Server) IPBlocklist() iplist.Ranger { return s.ipBlockList } diff --git a/doc.go b/doc.go index a15349e5..5545d343 100644 --- a/doc.go +++ b/doc.go @@ -20,8 +20,9 @@ A Client has a configurable ConfigDir that defaults to $HOME/.config/torrent. Torrent metainfo files are cached at $CONFIGDIR/torrents/$infohash.torrent. Infohashes in $CONFIGDIR/banned_infohashes cannot be added to the Client. A P2P Plaintext Format blocklist is loaded from a file at the location specified -by the environment variable TORRENT_BLOCKLIST_FILE if set, otherwise from -$CONFIGDIR/blocklist. +by the environment variable TORRENT_BLOCKLIST_FILE if set. otherwise from +$CONFIGDIR/blocklist. If $CONFIGDIR/packed-blocklist exists, this is memory- +mapped as a packed IP blocklist, saving considerable memory. */ package torrent diff --git a/iplist/cmd/pack-blocklist/main.go b/iplist/cmd/pack-blocklist/main.go new file mode 100644 index 00000000..ba6931d5 --- /dev/null +++ b/iplist/cmd/pack-blocklist/main.go @@ -0,0 +1,27 @@ +// Takes P2P blocklist text format in stdin, and outputs the packed format +// from the iplist package. +package main + +import ( + "bufio" + "os" + + "github.com/anacrolix/missinggo" + "github.com/anacrolix/missinggo/args" + + "github.com/anacrolix/torrent/iplist" +) + +func main() { + args.Parse() + l, err := iplist.NewFromReader(os.Stdin) + if err != nil { + missinggo.Fatal(err) + } + wb := bufio.NewWriter(os.Stdout) + defer wb.Flush() + err = l.WritePacked(wb) + if err != nil { + missinggo.Fatal(err) + } +} diff --git a/iplist/iplist.go b/iplist/iplist.go index 18d6b0a3..cd97b080 100644 --- a/iplist/iplist.go +++ b/iplist/iplist.go @@ -12,6 +12,14 @@ import ( "sort" ) +// An abstraction of IP list implementations. +type Ranger interface { + // Return a Range containing the IP. + Lookup(net.IP) *Range + // If your ranges hurt, use this. + NumRanges() int +} + type IPList struct { ranges []Range } @@ -62,30 +70,38 @@ func (me *IPList) Lookup(ip net.IP) (r *Range) { } if v4 == nil && v6 == nil { return &Range{ - Description: fmt.Sprintf("unsupported IP: %s", ip), + Description: "bad IP", } } return nil } -// Return the range the given IP is in. Returns nil if no range is found. -func (me *IPList) lookup(ip net.IP) (r *Range) { +// Return a range that contains ip, or nil. +func lookup(f func(i int) Range, n int, ip net.IP) *Range { // Find the index of the first range for which the following range exceeds // it. - i := sort.Search(len(me.ranges), func(i int) bool { - if i+1 >= len(me.ranges) { + i := sort.Search(n, func(i int) bool { + if i+1 >= n { return true } - return bytes.Compare(ip, me.ranges[i+1].First) < 0 + r := f(i + 1) + return bytes.Compare(ip, r.First) < 0 }) - if i == len(me.ranges) { - return + if i == n { + return nil } - r = &me.ranges[i] + r := f(i) if bytes.Compare(ip, r.First) < 0 || bytes.Compare(ip, r.Last) > 0 { - r = nil + return nil } - return + return &r +} + +// Return the range the given IP is in. Returns nil if no range is found. +func (me *IPList) lookup(ip net.IP) (r *Range) { + return lookup(func(i int) Range { + return me.ranges[i] + }, len(me.ranges), ip) } func minifyIP(ip *net.IP) { diff --git a/iplist/iplist_test.go b/iplist/iplist_test.go index b65f13bb..2b24e7e6 100644 --- a/iplist/iplist_test.go +++ b/iplist/iplist_test.go @@ -2,6 +2,7 @@ package iplist import ( "bufio" + "bytes" "fmt" "net" "strings" @@ -9,14 +10,36 @@ import ( "github.com/anacrolix/missinggo" "github.com/bradfitz/iter" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -var sample = ` +var ( + // Note the shared description "eff". The overlapping ranges at 1.2.8.2 + // will cause problems. Don't overlap your ranges. + sample = ` # List distributed by iblocklist.com a:1.2.4.0-1.2.4.255 b:1.2.8.0-1.2.8.255 -something:more detail:86.59.95.195-86.59.95.195` +eff:1.2.8.2-1.2.8.2 +something:more detail:86.59.95.195-86.59.95.195 +eff:127.0.0.0-127.0.0.1` + packedSample []byte +) + +func init() { + var buf bytes.Buffer + list, err := NewFromReader(strings.NewReader(sample)) + if err != nil { + panic(err) + } + err = list.WritePacked(&buf) + if err != nil { + panic(err) + } + packedSample = buf.Bytes() +} func TestIPv4RangeLen(t *testing.T) { ranges, _ := sampleRanges(t) @@ -73,30 +96,18 @@ func connRemoteAddrIP(network, laddr string, dialHost string) net.IP { } func TestBadIP(t *testing.T) { - iplist := New(nil) - if iplist.Lookup(net.IP(make([]byte, 4))) != nil { - t.FailNow() - } - if iplist.Lookup(net.IP(make([]byte, 16))) != nil { - t.FailNow() - } - if iplist.Lookup(nil) == nil { - t.FailNow() - } - if iplist.Lookup(net.IP(make([]byte, 5))) == nil { - t.FailNow() + for _, iplist := range []Ranger{ + New(nil), + NewFromPacked([]byte("\x00\x00\x00\x00\x00\x00\x00\x00")), + } { + assert.Nil(t, iplist.Lookup(net.IP(make([]byte, 4))), "%v", iplist) + assert.Nil(t, iplist.Lookup(net.IP(make([]byte, 16)))) + assert.Equal(t, iplist.Lookup(nil).Description, "bad IP") + assert.NotNil(t, iplist.Lookup(net.IP(make([]byte, 5)))) } } -func TestSimple(t *testing.T) { - ranges, err := sampleRanges(t) - if err != nil { - t.Fatal(err) - } - if len(ranges) != 3 { - t.Fatalf("expected 3 ranges but got %d", len(ranges)) - } - iplist := New(ranges) +func testLookuperSimple(t *testing.T, iplist Ranger) { for _, _case := range []struct { IP string Hit bool @@ -107,8 +118,9 @@ func TestSimple(t *testing.T) { {"1.2.4.255", true, "a"}, // Try to roll over to the next octet on the parse. Note the final // octet is overbounds. In the next case. - {"1.2.7.256", true, "unsupported IP: "}, - {"1.2.8.254", true, "b"}, + {"1.2.7.256", true, "bad IP"}, + {"1.2.8.1", true, "b"}, + {"1.2.8.2", true, "eff"}, } { ip := net.ParseIP(_case.IP) r := iplist.Lookup(ip) @@ -121,8 +133,16 @@ func TestSimple(t *testing.T) { if r == nil { t.Fatalf("expected hit for %q", _case.IP) } - if r.Description != _case.Desc { - t.Fatalf("%q != %q", r.Description, _case.Desc) - } + assert.Equal(t, _case.Desc, r.Description, "%T", iplist) } } + +func TestSimple(t *testing.T) { + ranges, err := sampleRanges(t) + require.NoError(t, err) + require.Len(t, ranges, 5) + iplist := New(ranges) + testLookuperSimple(t, iplist) + packed := NewFromPacked(packedSample) + testLookuperSimple(t, packed) +} diff --git a/iplist/packed.go b/iplist/packed.go new file mode 100644 index 00000000..152c680a --- /dev/null +++ b/iplist/packed.go @@ -0,0 +1,103 @@ +package iplist + +import ( + "encoding/binary" + "io" + "net" +) + +// The packed format is an 8 byte integer of the number of ranges. Then 20 +// bytes per range, consisting of 4 byte packed IP being the lower bound IP of +// the range, then 4 bytes of the upper, inclusive bound, 8 bytes for the +// offset of the description from the end of the packed ranges, and 4 bytes +// for the length of the description. After these packed ranges, are the +// concatenated descriptions. + +const ( + packedRangesOffset = 8 + packedRangeLen = 20 +) + +func (me *IPList) WritePacked(w io.Writer) (err error) { + descOffsets := make(map[string]int64, len(me.ranges)) + descs := make([]string, 0, len(me.ranges)) + var nextOffset int64 + // This is a little monadic, no? + write := func(b []byte, expectedLen int) { + if err != nil { + return + } + var n int + n, err = w.Write(b) + if err != nil { + return + } + if n != expectedLen { + panic(n) + } + } + var b [8]byte + binary.LittleEndian.PutUint64(b[:], uint64(len(me.ranges))) + write(b[:], 8) + for _, r := range me.ranges { + write(r.First.To4(), 4) + write(r.Last.To4(), 4) + descOff, ok := descOffsets[r.Description] + if !ok { + descOff = nextOffset + descOffsets[r.Description] = descOff + descs = append(descs, r.Description) + nextOffset += int64(len(r.Description)) + } + binary.LittleEndian.PutUint64(b[:], uint64(descOff)) + write(b[:], 8) + binary.LittleEndian.PutUint32(b[:], uint32(len(r.Description))) + write(b[:4], 4) + } + for _, d := range descs { + write([]byte(d), len(d)) + } + return +} + +func NewFromPacked(b []byte) PackedIPList { + return PackedIPList(b) +} + +type PackedIPList []byte + +var _ Ranger = PackedIPList{} + +func (me PackedIPList) len() int { + return int(binary.LittleEndian.Uint64(me[:8])) +} + +func (me PackedIPList) NumRanges() int { + return me.len() +} + +func (me PackedIPList) getRange(i int) (ret Range) { + rOff := packedRangesOffset + packedRangeLen*i + first := me[rOff : rOff+4] + last := me[rOff+4 : rOff+8] + descOff := int(binary.LittleEndian.Uint64(me[rOff+8:])) + descLen := int(binary.LittleEndian.Uint32(me[rOff+16:])) + descOff += packedRangesOffset + packedRangeLen*me.len() + ret = Range{net.IP(first), net.IP(last), string(me[descOff : descOff+descLen])} + return +} + +func (me PackedIPList) Lookup(ip net.IP) (r *Range) { + ip4 := ip.To4() + if ip4 == nil { + // If the IP list was built successfully, then it only contained IPv4 + // ranges. Therefore no IPv6 ranges are blocked. + if ip.To16() == nil { + r = &Range{ + Description: "bad IP", + } + } + return + } + return lookup(me.getRange, me.len(), ip4) +} diff --git a/iplist/packed_test.go b/iplist/packed_test.go new file mode 100644 index 00000000..d6235dbb --- /dev/null +++ b/iplist/packed_test.go @@ -0,0 +1,35 @@ +package iplist + +import ( + "bytes" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +// The active ingredients in the sample P2P blocklist file contents `sample`, +// for reference: +// +// a:1.2.4.0-1.2.4.255 +// b:1.2.8.0-1.2.8.255 +// eff:1.2.8.2-1.2.8.2 +// something:more detail:86.59.95.195-86.59.95.195 +// eff:127.0.0.0-127.0.0.1` + +func TestWritePacked(t *testing.T) { + l, err := NewFromReader(strings.NewReader(sample)) + require.NoError(t, err) + var buf bytes.Buffer + err = l.WritePacked(&buf) + require.NoError(t, err) + require.Equal(t, + "\x05\x00\x00\x00\x00\x00\x00\x00"+ + "\x01\x02\x04\x00\x01\x02\x04\xff"+"\x00\x00\x00\x00\x00\x00\x00\x00"+"\x01\x00\x00\x00"+ + "\x01\x02\x08\x00\x01\x02\x08\xff"+"\x01\x00\x00\x00\x00\x00\x00\x00"+"\x01\x00\x00\x00"+ + "\x01\x02\x08\x02\x01\x02\x08\x02"+"\x02\x00\x00\x00\x00\x00\x00\x00"+"\x03\x00\x00\x00"+ + "\x56\x3b\x5f\xc3\x56\x3b\x5f\xc3"+"\x05\x00\x00\x00\x00\x00\x00\x00"+"\x15\x00\x00\x00"+ + "\x7f\x00\x00\x00\x7f\x00\x00\x01"+"\x02\x00\x00\x00\x00\x00\x00\x00"+"\x03\x00\x00\x00"+ + "abeffsomething:more detail", + buf.String()) +}