Add packed IP list

This saves a lot of memory by allowing the IP blocklist to be mmap()ed in. In production with the latest level1 blocklist it's 35MB per process.
This commit is contained in:
Matt Joiner 2015-09-23 18:25:22 +10:00
parent 87158e594a
commit 22428da0e3
8 changed files with 276 additions and 48 deletions

View File

@ -29,6 +29,7 @@ import (
"github.com/anacrolix/sync"
"github.com/anacrolix/utp"
"github.com/bradfitz/iter"
"github.com/edsrzf/mmap-go"
"github.com/anacrolix/torrent/bencode"
"github.com/anacrolix/torrent/data"
@ -139,7 +140,7 @@ type Client struct {
listeners []net.Listener
utpSock *utp.Socket
dHT *dht.Server
ipBlockList *iplist.IPList
ipBlockList iplist.Ranger
bannedTorrents map[InfoHash]struct{}
config Config
pruneTimer *time.Timer
@ -158,13 +159,13 @@ type Client struct {
torrents map[InfoHash]*torrent
}
func (me *Client) IPBlockList() *iplist.IPList {
func (me *Client) IPBlockList() iplist.Ranger {
me.mu.Lock()
defer me.mu.Unlock()
return me.ipBlockList
}
func (me *Client) SetIPBlockList(list *iplist.IPList) {
func (me *Client) SetIPBlockList(list iplist.Ranger) {
me.mu.Lock()
defer me.mu.Unlock()
me.ipBlockList = list
@ -382,10 +383,35 @@ func (cl *Client) prioritizePiece(t *torrent, piece int, priority piecePriority)
}
}
func loadPackedBlocklist(filename string) (ret iplist.Ranger, err error) {
f, err := os.Open(filename)
if os.IsNotExist(err) {
err = nil
return
}
if err != nil {
return
}
defer f.Close()
mm, err := mmap.Map(f, mmap.RDONLY, 0)
if err != nil {
return
}
ret = iplist.NewFromPacked(mm)
return
}
func (cl *Client) setEnvBlocklist() (err error) {
filename := os.Getenv("TORRENT_BLOCKLIST_FILE")
defaultBlocklist := filename == ""
if defaultBlocklist {
cl.ipBlockList, err = loadPackedBlocklist(filepath.Join(cl.configDir(), "packed-blocklist"))
if err != nil {
return
}
if cl.ipBlockList != nil {
return
}
filename = filepath.Join(cl.configDir(), "blocklist")
}
f, err := os.Open(filename)

View File

@ -50,7 +50,7 @@ type Server struct {
nodes map[string]*node // Keyed by dHTAddr.String().
mu sync.Mutex
closed chan struct{}
ipBlockList *iplist.IPList
ipBlockList iplist.Ranger
badNodes *boom.BloomFilter
numConfirmedAnnounces int
@ -70,7 +70,7 @@ type ServerConfig struct {
NoSecurity bool
// Initial IP blocklist to use. Applied before serving and bootstrapping
// begins.
IPBlocklist *iplist.IPList
IPBlocklist iplist.Ranger
// Used to secure the server's ID. Defaults to the Conn's LocalAddr().
PublicIP net.IP
}
@ -595,13 +595,13 @@ func (s *Server) setDefaults() (err error) {
}
// Packets to and from any address matching a range in the list are dropped.
func (s *Server) SetIPBlockList(list *iplist.IPList) {
func (s *Server) SetIPBlockList(list iplist.Ranger) {
s.mu.Lock()
defer s.mu.Unlock()
s.ipBlockList = list
}
func (s *Server) IPBlocklist() *iplist.IPList {
func (s *Server) IPBlocklist() iplist.Ranger {
return s.ipBlockList
}

5
doc.go
View File

@ -20,8 +20,9 @@ A Client has a configurable ConfigDir that defaults to $HOME/.config/torrent.
Torrent metainfo files are cached at $CONFIGDIR/torrents/$infohash.torrent.
Infohashes in $CONFIGDIR/banned_infohashes cannot be added to the Client. A
P2P Plaintext Format blocklist is loaded from a file at the location specified
by the environment variable TORRENT_BLOCKLIST_FILE if set, otherwise from
$CONFIGDIR/blocklist.
by the environment variable TORRENT_BLOCKLIST_FILE if set. otherwise from
$CONFIGDIR/blocklist. If $CONFIGDIR/packed-blocklist exists, this is memory-
mapped as a packed IP blocklist, saving considerable memory.
*/
package torrent

View File

@ -0,0 +1,27 @@
// Takes P2P blocklist text format in stdin, and outputs the packed format
// from the iplist package.
package main
import (
"bufio"
"os"
"github.com/anacrolix/missinggo"
"github.com/anacrolix/missinggo/args"
"github.com/anacrolix/torrent/iplist"
)
func main() {
args.Parse()
l, err := iplist.NewFromReader(os.Stdin)
if err != nil {
missinggo.Fatal(err)
}
wb := bufio.NewWriter(os.Stdout)
defer wb.Flush()
err = l.WritePacked(wb)
if err != nil {
missinggo.Fatal(err)
}
}

View File

@ -12,6 +12,14 @@ import (
"sort"
)
// An abstraction of IP list implementations.
type Ranger interface {
// Return a Range containing the IP.
Lookup(net.IP) *Range
// If your ranges hurt, use this.
NumRanges() int
}
type IPList struct {
ranges []Range
}
@ -62,30 +70,38 @@ func (me *IPList) Lookup(ip net.IP) (r *Range) {
}
if v4 == nil && v6 == nil {
return &Range{
Description: fmt.Sprintf("unsupported IP: %s", ip),
Description: "bad IP",
}
}
return nil
}
// Return the range the given IP is in. Returns nil if no range is found.
func (me *IPList) lookup(ip net.IP) (r *Range) {
// Return a range that contains ip, or nil.
func lookup(f func(i int) Range, n int, ip net.IP) *Range {
// Find the index of the first range for which the following range exceeds
// it.
i := sort.Search(len(me.ranges), func(i int) bool {
if i+1 >= len(me.ranges) {
i := sort.Search(n, func(i int) bool {
if i+1 >= n {
return true
}
return bytes.Compare(ip, me.ranges[i+1].First) < 0
r := f(i + 1)
return bytes.Compare(ip, r.First) < 0
})
if i == len(me.ranges) {
return
if i == n {
return nil
}
r = &me.ranges[i]
r := f(i)
if bytes.Compare(ip, r.First) < 0 || bytes.Compare(ip, r.Last) > 0 {
r = nil
return nil
}
return
return &r
}
// Return the range the given IP is in. Returns nil if no range is found.
func (me *IPList) lookup(ip net.IP) (r *Range) {
return lookup(func(i int) Range {
return me.ranges[i]
}, len(me.ranges), ip)
}
func minifyIP(ip *net.IP) {

View File

@ -2,6 +2,7 @@ package iplist
import (
"bufio"
"bytes"
"fmt"
"net"
"strings"
@ -9,14 +10,36 @@ import (
"github.com/anacrolix/missinggo"
"github.com/bradfitz/iter"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var sample = `
var (
// Note the shared description "eff". The overlapping ranges at 1.2.8.2
// will cause problems. Don't overlap your ranges.
sample = `
# List distributed by iblocklist.com
a:1.2.4.0-1.2.4.255
b:1.2.8.0-1.2.8.255
something:more detail:86.59.95.195-86.59.95.195`
eff:1.2.8.2-1.2.8.2
something:more detail:86.59.95.195-86.59.95.195
eff:127.0.0.0-127.0.0.1`
packedSample []byte
)
func init() {
var buf bytes.Buffer
list, err := NewFromReader(strings.NewReader(sample))
if err != nil {
panic(err)
}
err = list.WritePacked(&buf)
if err != nil {
panic(err)
}
packedSample = buf.Bytes()
}
func TestIPv4RangeLen(t *testing.T) {
ranges, _ := sampleRanges(t)
@ -73,30 +96,18 @@ func connRemoteAddrIP(network, laddr string, dialHost string) net.IP {
}
func TestBadIP(t *testing.T) {
iplist := New(nil)
if iplist.Lookup(net.IP(make([]byte, 4))) != nil {
t.FailNow()
}
if iplist.Lookup(net.IP(make([]byte, 16))) != nil {
t.FailNow()
}
if iplist.Lookup(nil) == nil {
t.FailNow()
}
if iplist.Lookup(net.IP(make([]byte, 5))) == nil {
t.FailNow()
for _, iplist := range []Ranger{
New(nil),
NewFromPacked([]byte("\x00\x00\x00\x00\x00\x00\x00\x00")),
} {
assert.Nil(t, iplist.Lookup(net.IP(make([]byte, 4))), "%v", iplist)
assert.Nil(t, iplist.Lookup(net.IP(make([]byte, 16))))
assert.Equal(t, iplist.Lookup(nil).Description, "bad IP")
assert.NotNil(t, iplist.Lookup(net.IP(make([]byte, 5))))
}
}
func TestSimple(t *testing.T) {
ranges, err := sampleRanges(t)
if err != nil {
t.Fatal(err)
}
if len(ranges) != 3 {
t.Fatalf("expected 3 ranges but got %d", len(ranges))
}
iplist := New(ranges)
func testLookuperSimple(t *testing.T, iplist Ranger) {
for _, _case := range []struct {
IP string
Hit bool
@ -107,8 +118,9 @@ func TestSimple(t *testing.T) {
{"1.2.4.255", true, "a"},
// Try to roll over to the next octet on the parse. Note the final
// octet is overbounds. In the next case.
{"1.2.7.256", true, "unsupported IP: <nil>"},
{"1.2.8.254", true, "b"},
{"1.2.7.256", true, "bad IP"},
{"1.2.8.1", true, "b"},
{"1.2.8.2", true, "eff"},
} {
ip := net.ParseIP(_case.IP)
r := iplist.Lookup(ip)
@ -121,8 +133,16 @@ func TestSimple(t *testing.T) {
if r == nil {
t.Fatalf("expected hit for %q", _case.IP)
}
if r.Description != _case.Desc {
t.Fatalf("%q != %q", r.Description, _case.Desc)
assert.Equal(t, _case.Desc, r.Description, "%T", iplist)
}
}
func TestSimple(t *testing.T) {
ranges, err := sampleRanges(t)
require.NoError(t, err)
require.Len(t, ranges, 5)
iplist := New(ranges)
testLookuperSimple(t, iplist)
packed := NewFromPacked(packedSample)
testLookuperSimple(t, packed)
}

103
iplist/packed.go Normal file
View File

@ -0,0 +1,103 @@
package iplist
import (
"encoding/binary"
"io"
"net"
)
// The packed format is an 8 byte integer of the number of ranges. Then 20
// bytes per range, consisting of 4 byte packed IP being the lower bound IP of
// the range, then 4 bytes of the upper, inclusive bound, 8 bytes for the
// offset of the description from the end of the packed ranges, and 4 bytes
// for the length of the description. After these packed ranges, are the
// concatenated descriptions.
const (
packedRangesOffset = 8
packedRangeLen = 20
)
func (me *IPList) WritePacked(w io.Writer) (err error) {
descOffsets := make(map[string]int64, len(me.ranges))
descs := make([]string, 0, len(me.ranges))
var nextOffset int64
// This is a little monadic, no?
write := func(b []byte, expectedLen int) {
if err != nil {
return
}
var n int
n, err = w.Write(b)
if err != nil {
return
}
if n != expectedLen {
panic(n)
}
}
var b [8]byte
binary.LittleEndian.PutUint64(b[:], uint64(len(me.ranges)))
write(b[:], 8)
for _, r := range me.ranges {
write(r.First.To4(), 4)
write(r.Last.To4(), 4)
descOff, ok := descOffsets[r.Description]
if !ok {
descOff = nextOffset
descOffsets[r.Description] = descOff
descs = append(descs, r.Description)
nextOffset += int64(len(r.Description))
}
binary.LittleEndian.PutUint64(b[:], uint64(descOff))
write(b[:], 8)
binary.LittleEndian.PutUint32(b[:], uint32(len(r.Description)))
write(b[:4], 4)
}
for _, d := range descs {
write([]byte(d), len(d))
}
return
}
func NewFromPacked(b []byte) PackedIPList {
return PackedIPList(b)
}
type PackedIPList []byte
var _ Ranger = PackedIPList{}
func (me PackedIPList) len() int {
return int(binary.LittleEndian.Uint64(me[:8]))
}
func (me PackedIPList) NumRanges() int {
return me.len()
}
func (me PackedIPList) getRange(i int) (ret Range) {
rOff := packedRangesOffset + packedRangeLen*i
first := me[rOff : rOff+4]
last := me[rOff+4 : rOff+8]
descOff := int(binary.LittleEndian.Uint64(me[rOff+8:]))
descLen := int(binary.LittleEndian.Uint32(me[rOff+16:]))
descOff += packedRangesOffset + packedRangeLen*me.len()
ret = Range{net.IP(first), net.IP(last), string(me[descOff : descOff+descLen])}
return
}
func (me PackedIPList) Lookup(ip net.IP) (r *Range) {
ip4 := ip.To4()
if ip4 == nil {
// If the IP list was built successfully, then it only contained IPv4
// ranges. Therefore no IPv6 ranges are blocked.
if ip.To16() == nil {
r = &Range{
Description: "bad IP",
}
}
return
}
return lookup(me.getRange, me.len(), ip4)
}

35
iplist/packed_test.go Normal file
View File

@ -0,0 +1,35 @@
package iplist
import (
"bytes"
"strings"
"testing"
"github.com/stretchr/testify/require"
)
// The active ingredients in the sample P2P blocklist file contents `sample`,
// for reference:
//
// a:1.2.4.0-1.2.4.255
// b:1.2.8.0-1.2.8.255
// eff:1.2.8.2-1.2.8.2
// something:more detail:86.59.95.195-86.59.95.195
// eff:127.0.0.0-127.0.0.1`
func TestWritePacked(t *testing.T) {
l, err := NewFromReader(strings.NewReader(sample))
require.NoError(t, err)
var buf bytes.Buffer
err = l.WritePacked(&buf)
require.NoError(t, err)
require.Equal(t,
"\x05\x00\x00\x00\x00\x00\x00\x00"+
"\x01\x02\x04\x00\x01\x02\x04\xff"+"\x00\x00\x00\x00\x00\x00\x00\x00"+"\x01\x00\x00\x00"+
"\x01\x02\x08\x00\x01\x02\x08\xff"+"\x01\x00\x00\x00\x00\x00\x00\x00"+"\x01\x00\x00\x00"+
"\x01\x02\x08\x02\x01\x02\x08\x02"+"\x02\x00\x00\x00\x00\x00\x00\x00"+"\x03\x00\x00\x00"+
"\x56\x3b\x5f\xc3\x56\x3b\x5f\xc3"+"\x05\x00\x00\x00\x00\x00\x00\x00"+"\x15\x00\x00\x00"+
"\x7f\x00\x00\x00\x7f\x00\x00\x01"+"\x02\x00\x00\x00\x00\x00\x00\x00"+"\x03\x00\x00\x00"+
"abeffsomething:more detail",
buf.String())
}