Optimize FirstUniqueStrings and FirstUniquePaths

FirstUniquePaths is called on some long lists where the O(n^2)
behavior is problematic.  Use a map-based implementation for
longer lists.

Test: TestFirstUniqueStrings
Change-Id: I7181aba869e5ccc0f99c2fa7b8f03839f06e4307
This commit is contained in:
Colin Cross 2020-02-28 15:34:17 -08:00
parent c6e538406c
commit 27027c765e
4 changed files with 157 additions and 5 deletions

View File

@ -470,6 +470,14 @@ func (p Paths) Strings() []string {
// FirstUniquePaths returns all unique elements of a Paths, keeping the first copy of each. It
// modifies the Paths slice contents in place, and returns a subslice of the original slice.
func FirstUniquePaths(list Paths) Paths {
// 128 was chosen based on BenchmarkFirstUniquePaths results.
if len(list) > 128 {
return firstUniquePathsMap(list)
}
return firstUniquePathsList(list)
}
func firstUniquePathsList(list Paths) Paths {
k := 0
outer:
for i := 0; i < len(list); i++ {
@ -484,6 +492,20 @@ outer:
return list[:k]
}
func firstUniquePathsMap(list Paths) Paths {
k := 0
seen := make(map[Path]bool, len(list))
for i := 0; i < len(list); i++ {
if seen[list[i]] {
continue
}
seen[list[i]] = true
list[k] = list[i]
k++
}
return list[:k]
}
// LastUniquePaths returns all unique elements of a Paths, keeping the last copy of each. It
// modifies the Paths slice contents in place, and returns a subslice of the original slice.
func LastUniquePaths(list Paths) Paths {

View File

@ -18,6 +18,7 @@ import (
"errors"
"fmt"
"reflect"
"strconv"
"strings"
"testing"
@ -1255,3 +1256,51 @@ func ExampleOutputPath_FileInSameDir() {
// out/system/framework/boot.art out/system/framework/oat/arm/boot.vdex
// boot.art oat/arm/boot.vdex
}
func BenchmarkFirstUniquePaths(b *testing.B) {
implementations := []struct {
name string
f func(Paths) Paths
}{
{
name: "list",
f: firstUniquePathsList,
},
{
name: "map",
f: firstUniquePathsMap,
},
}
const maxSize = 1024
uniquePaths := make(Paths, maxSize)
for i := range uniquePaths {
uniquePaths[i] = PathForTesting(strconv.Itoa(i))
}
samePath := make(Paths, maxSize)
for i := range samePath {
samePath[i] = uniquePaths[0]
}
f := func(b *testing.B, imp func(Paths) Paths, paths Paths) {
for i := 0; i < b.N; i++ {
b.ReportAllocs()
paths = append(Paths(nil), paths...)
imp(paths)
}
}
for n := 1; n <= maxSize; n <<= 1 {
b.Run(strconv.Itoa(n), func(b *testing.B) {
for _, implementation := range implementations {
b.Run(implementation.name, func(b *testing.B) {
b.Run("same", func(b *testing.B) {
f(b, implementation.f, samePath[:n])
})
b.Run("unique", func(b *testing.B) {
f(b, implementation.f, uniquePaths[:n])
})
})
}
})
}
}

View File

@ -193,6 +193,14 @@ func RemoveFromList(s string, list []string) (bool, []string) {
// FirstUniqueStrings returns all unique elements of a slice of strings, keeping the first copy of
// each. It modifies the slice contents in place, and returns a subslice of the original slice.
func FirstUniqueStrings(list []string) []string {
// 128 was chosen based on BenchmarkFirstUniqueStrings results.
if len(list) > 128 {
return firstUniqueStringsMap(list)
}
return firstUniqueStringsList(list)
}
func firstUniqueStringsList(list []string) []string {
k := 0
outer:
for i := 0; i < len(list); i++ {
@ -207,6 +215,20 @@ outer:
return list[:k]
}
func firstUniqueStringsMap(list []string) []string {
k := 0
seen := make(map[string]bool, len(list))
for i := 0; i < len(list); i++ {
if seen[list[i]] {
continue
}
seen[list[i]] = true
list[k] = list[i]
k++
}
return list[:k]
}
// LastUniqueStrings returns all unique elements of a slice of strings, keeping the last copy of
// each. It modifies the slice contents in place, and returns a subslice of the original slice.
func LastUniqueStrings(list []string) []string {

View File

@ -17,6 +17,7 @@ package android
import (
"fmt"
"reflect"
"strconv"
"testing"
)
@ -59,15 +60,25 @@ var firstUniqueStringsTestCases = []struct {
}
func TestFirstUniqueStrings(t *testing.T) {
for _, testCase := range firstUniqueStringsTestCases {
out := FirstUniqueStrings(testCase.in)
if !reflect.DeepEqual(out, testCase.out) {
f := func(t *testing.T, imp func([]string) []string, in, want []string) {
t.Helper()
out := imp(in)
if !reflect.DeepEqual(out, want) {
t.Errorf("incorrect output:")
t.Errorf(" input: %#v", testCase.in)
t.Errorf(" expected: %#v", testCase.out)
t.Errorf(" input: %#v", in)
t.Errorf(" expected: %#v", want)
t.Errorf(" got: %#v", out)
}
}
for _, testCase := range firstUniqueStringsTestCases {
t.Run("list", func(t *testing.T) {
f(t, firstUniqueStringsList, testCase.in, testCase.out)
})
t.Run("map", func(t *testing.T) {
f(t, firstUniqueStringsMap, testCase.in, testCase.out)
})
}
}
var lastUniqueStringsTestCases = []struct {
@ -568,3 +579,51 @@ func Test_Shard(t *testing.T) {
})
}
}
func BenchmarkFirstUniqueStrings(b *testing.B) {
implementations := []struct {
name string
f func([]string) []string
}{
{
name: "list",
f: firstUniqueStringsList,
},
{
name: "map",
f: firstUniqueStringsMap,
},
}
const maxSize = 1024
uniqueStrings := make([]string, maxSize)
for i := range uniqueStrings {
uniqueStrings[i] = strconv.Itoa(i)
}
sameString := make([]string, maxSize)
for i := range sameString {
sameString[i] = uniqueStrings[0]
}
f := func(b *testing.B, imp func([]string) []string, s []string) {
for i := 0; i < b.N; i++ {
b.ReportAllocs()
s = append([]string(nil), s...)
imp(s)
}
}
for n := 1; n <= maxSize; n <<= 1 {
b.Run(strconv.Itoa(n), func(b *testing.B) {
for _, implementation := range implementations {
b.Run(implementation.name, func(b *testing.B) {
b.Run("same", func(b *testing.B) {
f(b, implementation.f, sameString[:n])
})
b.Run("unique", func(b *testing.B) {
f(b, implementation.f, uniqueStrings[:n])
})
})
}
})
}
}