diff --git a/Android.bp b/Android.bp index 82be0fa65..0c79bf539 100644 --- a/Android.bp +++ b/Android.bp @@ -2,6 +2,8 @@ subdirs = [ "androidmk", "bpfix", "cmd/*", + "fs", + "finder", "third_party/zip", "ui/*", ] diff --git a/finder/Android.bp b/finder/Android.bp new file mode 100644 index 000000000..b5c0e130d --- /dev/null +++ b/finder/Android.bp @@ -0,0 +1,37 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +// fast, parallel, caching implementation of `find` +// + +subdirs = [ + "cmd" +] + +bootstrap_go_package { + name: "soong-finder", + pkgPath: "android/soong/finder", + srcs: [ + "finder.go", + ], + testSrcs: [ + "finder_test.go", + ], + deps: [ + "soong-fs", + ], +} + + diff --git a/finder/cmd/Android.bp b/finder/cmd/Android.bp new file mode 100644 index 000000000..9dc84ae4d --- /dev/null +++ b/finder/cmd/Android.bp @@ -0,0 +1,29 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +// fast, parallel, caching implementation of `find` +// + +blueprint_go_binary { + name: "finder", + srcs: [ + "finder.go", + ], + deps: [ + "soong-finder" + ], +} + + diff --git a/finder/cmd/finder.go b/finder/cmd/finder.go new file mode 100644 index 000000000..9da166026 --- /dev/null +++ b/finder/cmd/finder.go @@ -0,0 +1,149 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "errors" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "os" + "runtime/pprof" + "sort" + "strings" + "time" + + "android/soong/finder" + "android/soong/fs" +) + +var ( + // configuration of what to find + excludeDirs string + filenamesToFind string + pruneFiles string + + // other configuration + cpuprofile string + verbose bool + dbPath string + numIterations int +) + +func init() { + flag.StringVar(&cpuprofile, "cpuprofile", "", + "filepath of profile file to write (optional)") + flag.BoolVar(&verbose, "v", false, "log additional information") + flag.StringVar(&dbPath, "db", "", "filepath of cache db") + + flag.StringVar(&excludeDirs, "exclude-dirs", "", + "comma-separated list of directory names to exclude from search") + flag.StringVar(&filenamesToFind, "names", "", + "comma-separated list of filenames to find") + flag.StringVar(&pruneFiles, "prune-files", "", + "filenames that if discovered will exclude their entire directory "+ + "(including sibling files and directories)") + flag.IntVar(&numIterations, "count", 1, + "number of times to run. This is intended for use with --cpuprofile"+ + " , to increase profile accuracy") +} + +var usage = func() { + fmt.Printf("usage: finder -name --db [...]\n") + flag.PrintDefaults() +} + +func main() { + err := run() + if err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err.Error()) + os.Exit(1) + } +} + +func stringToList(input string) []string { + return strings.Split(input, ",") +} + +func run() error { + startTime := time.Now() + flag.Parse() + + if cpuprofile != "" { + f, err := os.Create(cpuprofile) + if err != nil { + return fmt.Errorf("Error opening cpuprofile: %s", err) + } + pprof.StartCPUProfile(f) + defer f.Close() + defer pprof.StopCPUProfile() + } + + var writer io.Writer + if verbose { + writer = os.Stderr + } else { + writer = ioutil.Discard + } + + // TODO: replace Lshortfile with Llongfile when bug 63821638 is done + logger := log.New(writer, "", log.Ldate|log.Lmicroseconds|log.Lshortfile) + + logger.Printf("Finder starting at %v\n", startTime) + + rootPaths := flag.Args() + if len(rootPaths) < 1 { + usage() + return fmt.Errorf( + "Must give at least one ") + } + + workingDir, err := os.Getwd() + if err != nil { + return err + } + params := finder.CacheParams{ + WorkingDirectory: workingDir, + RootDirs: rootPaths, + ExcludeDirs: stringToList(excludeDirs), + PruneFiles: stringToList(pruneFiles), + IncludeFiles: stringToList(filenamesToFind), + } + if dbPath == "" { + usage() + return errors.New("Param 'db' must be nonempty") + } + matches := []string{} + for i := 0; i < numIterations; i++ { + matches = runFind(params, logger) + } + findDuration := time.Since(startTime) + logger.Printf("Found these %v inodes in %v :\n", len(matches), findDuration) + sort.Strings(matches) + for _, match := range matches { + fmt.Println(match) + } + logger.Printf("End of %v inodes\n", len(matches)) + logger.Printf("Finder completed in %v\n", time.Since(startTime)) + return nil +} + +func runFind(params finder.CacheParams, logger *log.Logger) (paths []string) { + service := finder.New(params, fs.OsFs, logger, dbPath) + defer service.Shutdown() + return service.FindAll() +} diff --git a/finder/finder.go b/finder/finder.go new file mode 100644 index 000000000..ad85ee9af --- /dev/null +++ b/finder/finder.go @@ -0,0 +1,1399 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package finder + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "runtime" + "sort" + "strings" + "sync" + "sync/atomic" + "time" + + "android/soong/fs" + "errors" +) + +// This file provides a Finder struct that can quickly search for files satisfying +// certain criteria. +// This Finder gets its speed partially from parallelism and partially from caching. +// If a Stat call returns the same result as last time, then it means Finder +// can skip the ReadDir call for that dir. + +// The primary data structure used by the finder is the field Finder.nodes , +// which is a tree of nodes of type *pathMap . +// Each node represents a directory on disk, along with its stats, subdirectories, +// and contained files. + +// The common use case for the Finder is that the caller creates a Finder and gives +// it the same query that was given to it in the previous execution. +// In this situation, the major events that take place are: +// 1. The Finder begins to load its db +// 2. The Finder begins to stat the directories mentioned in its db (using multiple threads) +// Calling Stat on each of these directories is generally a large fraction of the total time +// 3. The Finder begins to construct a separate tree of nodes in each of its threads +// 4. The Finder merges the individual node trees into the main node tree +// 5. The Finder may call ReadDir a few times if there are a few directories that are out-of-date +// These ReadDir calls might prompt additional Stat calls, etc +// 6. The Finder waits for all loading to complete +// 7. The Finder searches the cache for files matching the user's query (using multiple threads) + +// These are the invariants regarding concurrency: +// 1. The public methods of Finder are threadsafe. +// The public methods are only performance-optimized for one caller at a time, however. +// For the moment, multiple concurrent callers shouldn't expect any better performance than +// multiple serial callers. +// 2. While building the node tree, only one thread may ever access the collection of a +// *pathMap at once. +// a) The thread that accesses the collection is the thread that discovers the +// children (by reading them from the cache or by having received a response to ReadDir). +// 1) Consequently, the thread that discovers the children also spawns requests to stat +// subdirs. +// b) Consequently, while building the node tree, no thread may do a lookup of its +// *pathMap via filepath because another thread may be adding children to the +// collection of an ancestor node. Additionally, in rare cases, another thread +// may be removing children from an ancestor node if the children were only discovered to +// be irrelevant after calling ReadDir (which happens if a prune-file was just added). +// 3. No query will begin to be serviced until all loading (both reading the db +// and scanning the filesystem) is complete. +// Tests indicate that it only takes about 10% as long to search the in-memory cache as to +// generate it, making this not a huge loss in performance. +// 4. The parsing of the db and the initial setup of the pathMap tree must complete before +// beginning to call listDirSync (because listDirSync can create new entries in the pathMap) + +// see cmd/finder.go or finder_test.go for usage examples + +// Update versionString whenever making a backwards-incompatible change to the cache file format +const versionString = "Android finder version 1" + +// a CacheParams specifies which files and directories the user wishes be scanned and +// potentially added to the cache +type CacheParams struct { + // WorkingDirectory is used as a base for any relative file paths given to the Finder + WorkingDirectory string + + // RootDirs are the root directories used to initiate the search + RootDirs []string + + // ExcludeDirs are directory names that if encountered are removed from the search + ExcludeDirs []string + + // PruneFiles are file names that if encountered prune their entire directory + // (including siblings) + PruneFiles []string + + // IncludeFiles are file names to include as matches + IncludeFiles []string +} + +// a cacheConfig stores the inputs that determine what should be included in the cache +type cacheConfig struct { + CacheParams + + // FilesystemView is a unique identifier telling which parts of which file systems + // are readable by the Finder. In practice its value is essentially username@hostname. + // FilesystemView is set to ensure that a cache file copied to another host or + // found by another user doesn't inadvertently get reused. + FilesystemView string +} + +func (p *cacheConfig) Dump() ([]byte, error) { + bytes, err := json.Marshal(p) + return bytes, err +} + +// a cacheMetadata stores version information about the cache +type cacheMetadata struct { + // The Version enables the Finder to determine whether it can even parse the file + // If the version changes, the entire cache file must be regenerated + Version string + + // The CacheParams enables the Finder to determine whether the parameters match + // If the CacheParams change, the Finder can choose how much of the cache file to reuse + // (although in practice, the Finder will probably choose to ignore the entire file anyway) + Config cacheConfig +} + +type Logger interface { + Output(calldepth int, s string) error +} + +// the Finder is the main struct that callers will want to use +type Finder struct { + // configuration + DbPath string + numDbLoadingThreads int + numSearchingThreads int + cacheMetadata cacheMetadata + logger Logger + filesystem fs.FileSystem + + // temporary state + threadPool *threadPool + mutex sync.Mutex + + // non-temporary state + modifiedFlag int32 + nodes pathMap +} + +// New creates a new Finder for use +func New(cacheParams CacheParams, filesystem fs.FileSystem, + logger Logger, dbPath string) *Finder { + + numThreads := runtime.NumCPU() * 2 + numDbLoadingThreads := numThreads + numSearchingThreads := numThreads + + metadata := cacheMetadata{ + Version: versionString, + Config: cacheConfig{ + CacheParams: cacheParams, + FilesystemView: filesystem.ViewId(), + }, + } + + finder := &Finder{ + numDbLoadingThreads: numDbLoadingThreads, + numSearchingThreads: numSearchingThreads, + cacheMetadata: metadata, + logger: logger, + filesystem: filesystem, + + nodes: *newPathMap("/"), + DbPath: dbPath, + } + + finder.loadFromFilesystem() + + finder.verbosef("Done parsing db\n") + return finder +} + +// FindNamed searches for every cached file +func (f *Finder) FindAll() []string { + return f.FindAt("/") +} + +// FindNamed searches for every cached file under +func (f *Finder) FindAt(rootDir string) []string { + filter := func(entries DirEntries) (dirNames []string, fileNames []string) { + return entries.DirNames, entries.FileNames + } + return f.FindMatching(rootDir, filter) +} + +// FindNamed searches for every cached file named +func (f *Finder) FindNamed(fileName string) []string { + return f.FindNamedAt("/", fileName) +} + +// FindNamedAt searches under for every file named +// The reason a caller might use FindNamedAt instead of FindNamed is if they want +// to limit their search to a subset of the cache +func (f *Finder) FindNamedAt(rootPath string, fileName string) []string { + filter := func(entries DirEntries) (dirNames []string, fileNames []string) { + matches := []string{} + for _, foundName := range entries.FileNames { + if foundName == fileName { + matches = append(matches, foundName) + } + } + return entries.DirNames, matches + + } + return f.FindMatching(rootPath, filter) +} + +// FindFirstNamed searches for every file named +// Whenever it finds a match, it stops search subdirectories +func (f *Finder) FindFirstNamed(fileName string) []string { + return f.FindFirstNamedAt("/", fileName) +} + +// FindFirstNamedAt searches for every file named +// Whenever it finds a match, it stops search subdirectories +func (f *Finder) FindFirstNamedAt(rootPath string, fileName string) []string { + filter := func(entries DirEntries) (dirNames []string, fileNames []string) { + matches := []string{} + for _, foundName := range entries.FileNames { + if foundName == fileName { + matches = append(matches, foundName) + } + } + + if len(matches) > 0 { + return []string{}, matches + } + return entries.DirNames, matches + } + return f.FindMatching(rootPath, filter) +} + +// FindMatching is the most general exported function for searching for files in the cache +// The WalkFunc will be invoked repeatedly and is expected to modify the provided DirEntries +// in place, removing file paths and directories as desired. +// WalkFunc will be invoked potentially many times in parallel, and must be threadsafe. +func (f *Finder) FindMatching(rootPath string, filter WalkFunc) []string { + // set up some parameters + scanStart := time.Now() + var isRel bool + workingDir := f.cacheMetadata.Config.WorkingDirectory + + isRel = !filepath.IsAbs(rootPath) + if isRel { + rootPath = filepath.Join(workingDir, rootPath) + } + + rootPath = filepath.Clean(rootPath) + + // ensure nothing else is using the Finder + f.verbosef("FindMatching waiting for finder to be idle\n") + f.lock() + defer f.unlock() + + node := f.nodes.GetNode(rootPath, false) + if node == nil { + f.verbosef("No data for path %v ; apparently not included in cache params: %v\n", + rootPath, f.cacheMetadata.Config.CacheParams) + // path is not found; don't do a search + return []string{} + } + + // search for matching files + f.verbosef("Finder finding %v using cache\n", rootPath) + results := f.findInCacheMultithreaded(node, filter, f.numSearchingThreads) + + // format and return results + if isRel { + for i := 0; i < len(results); i++ { + results[i] = strings.Replace(results[i], workingDir+"/", "", 1) + } + } + sort.Strings(results) + f.verbosef("Found %v files under %v in %v using cache\n", + len(results), rootPath, time.Since(scanStart)) + return results +} + +// Shutdown saves the contents of the Finder to its database file +func (f *Finder) Shutdown() { + f.verbosef("Shutting down\n") + if f.wasModified() { + err := f.dumpDb() + if err != nil { + f.verbosef("%v\n", err) + } + } else { + f.verbosef("Skipping dumping unmodified db\n") + } +} + +// End of public api + +// joinCleanPaths is like filepath.Join but is faster because +// joinCleanPaths doesn't have to support paths ending in "/" or containing ".." +func joinCleanPaths(base string, leaf string) string { + if base == "" { + return leaf + } + if base == "/" { + return base + leaf + } + if leaf == "" { + return base + } + return base + "/" + leaf +} + +func (f *Finder) verbosef(format string, args ...interface{}) { + f.logger.Output(2, fmt.Sprintf(format, args...)) +} + +// loadFromFilesystem populates the in-memory cache based on the contents of the filesystem +func (f *Finder) loadFromFilesystem() { + f.threadPool = newThreadPool(f.numDbLoadingThreads) + + err := f.startFromExternalCache() + if err != nil { + f.startWithoutExternalCache() + } + + startTime := time.Now() + f.verbosef("Waiting for pending requests to complete\n") + f.threadPool.Wait() + f.verbosef("Is idle after %v\n", time.Now().Sub(startTime)) + f.threadPool = nil +} + +func (f *Finder) startFind(path string) { + if !filepath.IsAbs(path) { + path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path) + } + node := f.nodes.GetNode(path, true) + f.statDirAsync(node) +} + +func (f *Finder) lock() { + f.mutex.Lock() +} + +func (f *Finder) unlock() { + f.mutex.Unlock() +} + +// a statResponse is the relevant portion of the response from the filesystem to a Stat call +type statResponse struct { + ModTime int64 + Inode uint64 + Device uint64 +} + +// a pathAndStats stores a path and its stats +type pathAndStats struct { + statResponse + + Path string +} + +// a dirFullInfo stores all of the relevant information we know about a directory +type dirFullInfo struct { + pathAndStats + + FileNames []string +} + +// a PersistedDirInfo is the information about a dir that we save to our cache on disk +type PersistedDirInfo struct { + // These field names are short because they are repeated many times in the output json file + P string // path + T int64 // modification time + I uint64 // inode number + F []string // relevant filenames contained +} + +// a PersistedDirs is the information that we persist for a group of dirs +type PersistedDirs struct { + // the device on which each directory is stored + Device uint64 + // the common root path to which all contained dirs are relative + Root string + // the directories themselves + Dirs []PersistedDirInfo +} + +// a CacheEntry is the smallest unit that can be read and parsed from the cache (on disk) at a time +type CacheEntry []PersistedDirs + +// a DirEntries lists the files and directories contained directly within a specific directory +type DirEntries struct { + Path string + + // elements of DirNames are just the dir names; they don't include any '/' character + DirNames []string + // elements of FileNames are just the file names; they don't include '/' character + FileNames []string +} + +// a WalkFunc is the type that is passed into various Find functions for determining which +// directories the caller wishes be walked. The WalkFunc is expected to decide which +// directories to walk and which files to consider as matches to the original query. +type WalkFunc func(DirEntries) (dirs []string, files []string) + +// a mapNode stores the relevant stats about a directory to be stored in a pathMap +type mapNode struct { + statResponse + FileNames []string +} + +// a pathMap implements the directory tree structure of nodes +type pathMap struct { + mapNode + + path string + + children map[string]*pathMap + + // number of descendent nodes, including self + approximateNumDescendents int +} + +func newPathMap(path string) *pathMap { + result := &pathMap{path: path, children: make(map[string]*pathMap, 4), + approximateNumDescendents: 1} + return result +} + +// GetNode returns the node at +func (m *pathMap) GetNode(path string, createIfNotFound bool) *pathMap { + if len(path) > 0 && path[0] == '/' { + path = path[1:] + } + + node := m + for { + if path == "" { + return node + } + + index := strings.Index(path, "/") + var firstComponent string + if index >= 0 { + firstComponent = path[:index] + path = path[index+1:] + } else { + firstComponent = path + path = "" + } + + child, found := node.children[firstComponent] + + if !found { + if createIfNotFound { + child = node.newChild(firstComponent) + } else { + return nil + } + } + + node = child + } +} + +func (m *pathMap) newChild(name string) (child *pathMap) { + path := joinCleanPaths(m.path, name) + newChild := newPathMap(path) + m.children[name] = newChild + + return m.children[name] +} + +func (m *pathMap) UpdateNumDescendents() int { + count := 1 + for _, child := range m.children { + count += child.approximateNumDescendents + } + m.approximateNumDescendents = count + return count +} + +func (m *pathMap) UpdateNumDescendentsRecursive() { + for _, child := range m.children { + child.UpdateNumDescendentsRecursive() + } + m.UpdateNumDescendents() +} + +func (m *pathMap) MergeIn(other *pathMap) { + for key, theirs := range other.children { + ours, found := m.children[key] + if found { + ours.MergeIn(theirs) + } else { + m.children[key] = theirs + } + } + if other.ModTime != 0 { + m.mapNode = other.mapNode + } + m.UpdateNumDescendents() +} + +func (m *pathMap) DumpAll() []dirFullInfo { + results := []dirFullInfo{} + m.dumpInto("", &results) + return results +} + +func (m *pathMap) dumpInto(path string, results *[]dirFullInfo) { + *results = append(*results, + dirFullInfo{ + pathAndStats{statResponse: m.statResponse, Path: path}, + m.FileNames}, + ) + for key, child := range m.children { + childPath := joinCleanPaths(path, key) + if len(childPath) == 0 || childPath[0] != '/' { + childPath = "/" + childPath + } + child.dumpInto(childPath, results) + } +} + +// a semaphore can be locked by up to callers at once +type semaphore struct { + pool chan bool +} + +func newSemaphore(capacity int) *semaphore { + return &semaphore{pool: make(chan bool, capacity)} +} + +func (l *semaphore) Lock() { + l.pool <- true +} + +func (l *semaphore) Unlock() { + <-l.pool +} + +// A threadPool runs goroutines and supports throttling and waiting. +// Without throttling, Go may exhaust the maximum number of various resources, such as +// threads or file descriptors, and crash the program. +type threadPool struct { + receivedRequests sync.WaitGroup + activeRequests semaphore +} + +func newThreadPool(maxNumConcurrentThreads int) *threadPool { + return &threadPool{ + receivedRequests: sync.WaitGroup{}, + activeRequests: *newSemaphore(maxNumConcurrentThreads), + } +} + +// Run requests to run the given function in its own goroutine +func (p *threadPool) Run(function func()) { + p.receivedRequests.Add(1) + // If Run() was called from within a goroutine spawned by this threadPool, + // then we may need to return from Run() before having capacity to actually + // run . + // + // It's possible that the body of contains a statement (such as a syscall) + // that will cause Go to pin it to a thread, or will contain a statement that uses + // another resource that is in short supply (such as a file descriptor), so we can't + // actually run until we have capacity. + // + // However, the semaphore used for synchronization is implemented via a channel and + // shouldn't require a new thread for each access. + go func() { + p.activeRequests.Lock() + function() + p.activeRequests.Unlock() + p.receivedRequests.Done() + }() +} + +// Wait waits until all goroutines are done, just like sync.WaitGroup's Wait +func (p *threadPool) Wait() { + p.receivedRequests.Wait() +} + +func (f *Finder) serializeCacheEntry(dirInfos []dirFullInfo) ([]byte, error) { + // group each dirFullInfo by its Device, to avoid having to repeat it in the output + dirsByDevice := map[uint64][]PersistedDirInfo{} + for _, entry := range dirInfos { + _, found := dirsByDevice[entry.Device] + if !found { + dirsByDevice[entry.Device] = []PersistedDirInfo{} + } + dirsByDevice[entry.Device] = append(dirsByDevice[entry.Device], + PersistedDirInfo{P: entry.Path, T: entry.ModTime, I: entry.Inode, F: entry.FileNames}) + } + + cacheEntry := CacheEntry{} + + for device, infos := range dirsByDevice { + // find common prefix + prefix := "" + if len(infos) > 0 { + prefix = infos[0].P + } + for _, info := range infos { + for !strings.HasPrefix(info.P+"/", prefix+"/") { + prefix = filepath.Dir(prefix) + } + } + // remove common prefix + for i := range infos { + suffix := strings.Replace(infos[i].P, prefix, "", 1) + if len(suffix) > 0 && suffix[0] == '/' { + suffix = suffix[1:] + } + infos[i].P = suffix + } + + // turn the map (keyed by device) into a list of structs with labeled fields + // this is to improve readability of the output + cacheEntry = append(cacheEntry, PersistedDirs{Device: device, Root: prefix, Dirs: infos}) + } + + // convert to json. + // it would save some space to use a different format than json for the db file, + // but the space and time savings are small, and json is easy for humans to read + bytes, err := json.Marshal(cacheEntry) + return bytes, err +} + +func (f *Finder) parseCacheEntry(bytes []byte) ([]dirFullInfo, error) { + var cacheEntry CacheEntry + err := json.Unmarshal(bytes, &cacheEntry) + if err != nil { + return nil, err + } + + // convert from a CacheEntry to a []dirFullInfo (by copying a few fields) + capacity := 0 + for _, element := range cacheEntry { + capacity += len(element.Dirs) + } + nodes := make([]dirFullInfo, capacity) + count := 0 + for _, element := range cacheEntry { + for _, dir := range element.Dirs { + path := joinCleanPaths(element.Root, dir.P) + + nodes[count] = dirFullInfo{ + pathAndStats: pathAndStats{ + statResponse: statResponse{ + ModTime: dir.T, Inode: dir.I, Device: element.Device, + }, + Path: path}, + FileNames: dir.F} + count++ + } + } + return nodes, nil +} + +// We use the following separator byte to distinguish individually parseable blocks of json +// because we know this separator won't appear in the json that we're parsing. +// +// The newline byte can only appear in a UTF-8 stream if the newline character appears, because: +// - The newline character is encoded as "0000 1010" in binary ("0a" in hex) +// - UTF-8 dictates that bytes beginning with a "0" bit are never emitted as part of a multibyte +// character. +// +// We know that the newline character will never appear in our json string, because: +// - If a newline character appears as part of a data string, then json encoding will +// emit two characters instead: '\' and 'n'. +// - The json encoder that we use doesn't emit the optional newlines between any of its +// other outputs. +const lineSeparator = byte('\n') + +func (f *Finder) readLine(reader *bufio.Reader) ([]byte, error) { + return reader.ReadBytes(lineSeparator) +} + +// validateCacheHeader reads the cache header from cacheReader and tells whether the cache is compatible with this Finder +func (f *Finder) validateCacheHeader(cacheReader *bufio.Reader) bool { + cacheVersionBytes, err := f.readLine(cacheReader) + if err != nil { + f.verbosef("Failed to read database header; database is invalid\n") + return false + } + if len(cacheVersionBytes) > 0 && cacheVersionBytes[len(cacheVersionBytes)-1] == lineSeparator { + cacheVersionBytes = cacheVersionBytes[:len(cacheVersionBytes)-1] + } + cacheVersionString := string(cacheVersionBytes) + currentVersion := f.cacheMetadata.Version + if cacheVersionString != currentVersion { + f.verbosef("Version changed from %q to %q, database is not applicable\n", cacheVersionString, currentVersion) + return false + } + + cacheParamBytes, err := f.readLine(cacheReader) + if err != nil { + f.verbosef("Failed to read database search params; database is invalid\n") + return false + } + + if len(cacheParamBytes) > 0 && cacheParamBytes[len(cacheParamBytes)-1] == lineSeparator { + cacheParamBytes = cacheParamBytes[:len(cacheParamBytes)-1] + } + + currentParamBytes, err := f.cacheMetadata.Config.Dump() + if err != nil { + panic("Finder failed to serialize its parameters") + } + cacheParamString := string(cacheParamBytes) + currentParamString := string(currentParamBytes) + if cacheParamString != currentParamString { + f.verbosef("Params changed from %q to %q, database is not applicable\n", cacheParamString, currentParamString) + return false + } + return true +} + +// loadBytes compares the cache info in to the state of the filesystem +// loadBytes returns a map representing and also a slice of dirs that need to be re-walked +func (f *Finder) loadBytes(id int, data []byte) (m *pathMap, dirsToWalk []string, err error) { + + helperStartTime := time.Now() + + cachedNodes, err := f.parseCacheEntry(data) + if err != nil { + return nil, nil, fmt.Errorf("Failed to parse block %v: %v\n", id, err.Error()) + } + + unmarshalDate := time.Now() + f.verbosef("Unmarshaled %v objects for %v in %v\n", + len(cachedNodes), id, unmarshalDate.Sub(helperStartTime)) + + tempMap := newPathMap("/") + stats := make([]statResponse, len(cachedNodes)) + + for i, node := range cachedNodes { + // check the file system for an updated timestamp + stats[i] = f.statDirSync(node.Path) + } + + dirsToWalk = []string{} + for i, cachedNode := range cachedNodes { + updated := stats[i] + // save the cached value + container := tempMap.GetNode(cachedNode.Path, true) + container.mapNode = mapNode{statResponse: updated} + + // if the metadata changed and the directory still exists, then + // make a note to walk it later + if !f.isInfoUpToDate(cachedNode.statResponse, updated) && updated.ModTime != 0 { + f.setModified() + // make a note that the directory needs to be walked + dirsToWalk = append(dirsToWalk, cachedNode.Path) + } else { + container.mapNode.FileNames = cachedNode.FileNames + } + } + // count the number of nodes to improve our understanding of the shape of the tree, + // thereby improving parallelism of subsequent searches + tempMap.UpdateNumDescendentsRecursive() + + f.verbosef("Statted inodes of block %v in %v\n", id, time.Now().Sub(unmarshalDate)) + return tempMap, dirsToWalk, nil +} + +// startFromExternalCache loads the cache database from disk +// startFromExternalCache waits to return until the load of the cache db is complete, but +// startFromExternalCache does not wait for all every listDir() or statDir() request to complete +func (f *Finder) startFromExternalCache() (err error) { + startTime := time.Now() + dbPath := f.DbPath + + // open cache file and validate its header + reader, err := f.filesystem.Open(dbPath) + if err != nil { + return errors.New("No data to load from database\n") + } + bufferedReader := bufio.NewReader(reader) + if !f.validateCacheHeader(bufferedReader) { + return errors.New("Cache header does not match") + } + f.verbosef("Database header matches, will attempt to use database %v\n", f.DbPath) + + // read the file and spawn threads to process it + nodesToWalk := [][]*pathMap{} + mainTree := newPathMap("/") + + // read the blocks and stream them into + type dataBlock struct { + id int + err error + data []byte + } + blockChannel := make(chan dataBlock, f.numDbLoadingThreads) + readBlocks := func() { + index := 0 + for { + // It takes some time to unmarshal the input from json, so we want + // to unmarshal it in parallel. In order to find valid places to + // break the input, we scan for the line separators that we inserted + // (for this purpose) when we dumped the database. + data, err := f.readLine(bufferedReader) + var response dataBlock + done := false + if err != nil && err != io.EOF { + response = dataBlock{id: index, err: err, data: nil} + done = true + } else { + done = (err == io.EOF) + response = dataBlock{id: index, err: nil, data: data} + } + blockChannel <- response + index++ + duration := time.Since(startTime) + f.verbosef("Read block %v after %v\n", index, duration) + if done { + f.verbosef("Read %v blocks in %v\n", index, duration) + close(blockChannel) + return + } + } + } + go readBlocks() + + // Read from and stream the responses into . + type workResponse struct { + id int + err error + tree *pathMap + updatedDirs []string + } + resultChannel := make(chan workResponse) + processBlocks := func() { + numProcessed := 0 + threadPool := newThreadPool(f.numDbLoadingThreads) + for { + // get a block to process + block, received := <-blockChannel + if !received { + break + } + + if block.err != nil { + resultChannel <- workResponse{err: block.err} + break + } + numProcessed++ + // wait until there is CPU available to process it + threadPool.Run( + func() { + processStartTime := time.Now() + f.verbosef("Starting to process block %v after %v\n", + block.id, processStartTime.Sub(startTime)) + tempMap, updatedDirs, err := f.loadBytes(block.id, block.data) + var response workResponse + if err != nil { + f.verbosef( + "Block %v failed to parse with error %v\n", + block.id, err) + response = workResponse{err: err} + } else { + response = workResponse{ + id: block.id, + err: nil, + tree: tempMap, + updatedDirs: updatedDirs, + } + } + f.verbosef("Processed block %v in %v\n", + block.id, time.Since(processStartTime), + ) + resultChannel <- response + }, + ) + } + threadPool.Wait() + f.verbosef("Finished processing %v blocks in %v\n", + numProcessed, time.Since(startTime)) + close(resultChannel) + } + go processBlocks() + + // Read from and use the results + combineResults := func() (err error) { + for { + result, received := <-resultChannel + if !received { + break + } + if err != nil { + // In case of an error, wait for work to complete before + // returning the error. This ensures that any subsequent + // work doesn't need to compete for resources (and possibly + // fail due to, for example, a filesystem limit on the number of + // concurrently open files) with past work. + continue + } + if result.err != nil { + err = result.err + continue + } + // update main tree + mainTree.MergeIn(result.tree) + // record any new directories that we will need to Stat() + updatedNodes := make([]*pathMap, len(result.updatedDirs)) + for j, dir := range result.updatedDirs { + node := mainTree.GetNode(dir, false) + updatedNodes[j] = node + } + nodesToWalk = append(nodesToWalk, updatedNodes) + } + return err + } + err = combineResults() + if err != nil { + return err + } + + f.nodes = *mainTree + + // after having loaded the entire db and therefore created entries for + // the directories we know of, now it's safe to start calling ReadDir on + // any updated directories + for i := range nodesToWalk { + f.listDirsAsync(nodesToWalk[i]) + } + f.verbosef("Loaded db and statted its contents in %v\n", time.Since(startTime)) + return err +} + +// startWithoutExternalCache starts scanning the filesystem according to the cache config +// startWithoutExternalCache should be called if startFromExternalCache is not applicable +func (f *Finder) startWithoutExternalCache() { + configDirs := f.cacheMetadata.Config.RootDirs + + // clean paths + candidates := make([]string, len(configDirs)) + for i, dir := range configDirs { + candidates[i] = filepath.Clean(dir) + } + // remove duplicates + dirsToScan := make([]string, 0, len(configDirs)) + for _, candidate := range candidates { + include := true + for _, included := range dirsToScan { + if included == "/" || strings.HasPrefix(candidate+"/", included+"/") { + include = false + break + } + } + if include { + dirsToScan = append(dirsToScan, candidate) + } + } + + // start searching finally + for _, path := range dirsToScan { + f.verbosef("Starting find of %v\n", path) + f.startFind(path) + } +} + +// isInfoUpToDate tells whether can confirm that results computed at are still valid +func (f *Finder) isInfoUpToDate(old statResponse, new statResponse) (equal bool) { + if old.Inode != new.Inode { + return false + } + if old.ModTime != new.ModTime { + return false + } + if old.Device != new.Device { + return false + } + return true +} + +func (f *Finder) wasModified() bool { + return atomic.LoadInt32(&f.modifiedFlag) > 0 +} + +func (f *Finder) setModified() { + var newVal int32 + newVal = 1 + atomic.StoreInt32(&f.modifiedFlag, newVal) +} + +// sortedDirEntries exports directory entries to facilitate dumping them to the external cache +func (f *Finder) sortedDirEntries() []dirFullInfo { + startTime := time.Now() + nodes := make([]dirFullInfo, 0) + for _, node := range f.nodes.DumpAll() { + if node.ModTime != 0 { + nodes = append(nodes, node) + } + } + discoveryDate := time.Now() + f.verbosef("Generated %v cache entries in %v\n", len(nodes), discoveryDate.Sub(startTime)) + less := func(i int, j int) bool { + return nodes[i].Path < nodes[j].Path + } + sort.Slice(nodes, less) + sortDate := time.Now() + f.verbosef("Sorted %v cache entries in %v\n", len(nodes), sortDate.Sub(discoveryDate)) + + return nodes +} + +// serializeDb converts the cache database into a form to save to disk +func (f *Finder) serializeDb() ([]byte, error) { + // sort dir entries + var entryList = f.sortedDirEntries() + + // Generate an output file that can be conveniently loaded using the same number of threads + // as were used in this execution (because presumably that will be the number of threads + // used in the next execution too) + + // generate header + header := []byte{} + header = append(header, []byte(f.cacheMetadata.Version)...) + header = append(header, lineSeparator) + configDump, err := f.cacheMetadata.Config.Dump() + if err != nil { + return nil, err + } + header = append(header, configDump...) + + // serialize individual blocks in parallel + numBlocks := f.numDbLoadingThreads + if numBlocks > len(entryList) { + numBlocks = len(entryList) + } + blocks := make([][]byte, 1+numBlocks) + blocks[0] = header + blockMin := 0 + wg := sync.WaitGroup{} + var errLock sync.Mutex + + for i := 1; i <= numBlocks; i++ { + // identify next block + blockMax := len(entryList) * i / numBlocks + block := entryList[blockMin:blockMax] + + // process block + wg.Add(1) + go func(index int, block []dirFullInfo) { + byteBlock, subErr := f.serializeCacheEntry(block) + f.verbosef("Serialized block %v into %v bytes\n", index, len(byteBlock)) + if subErr != nil { + f.verbosef("%v\n", subErr.Error()) + errLock.Lock() + err = subErr + errLock.Unlock() + } else { + blocks[index] = byteBlock + } + wg.Done() + }(i, block) + + blockMin = blockMax + } + + wg.Wait() + + if err != nil { + return nil, err + } + + content := bytes.Join(blocks, []byte{lineSeparator}) + + return content, nil +} + +// dumpDb saves the cache database to disk +func (f *Finder) dumpDb() error { + startTime := time.Now() + f.verbosef("Dumping db\n") + + tempPath := f.DbPath + ".tmp" + + bytes, err := f.serializeDb() + if err != nil { + return err + } + serializeDate := time.Now() + f.verbosef("Serialized db in %v\n", serializeDate.Sub(startTime)) + // dump file and atomically move + err = f.filesystem.WriteFile(tempPath, bytes, 0777) + if err != nil { + return err + } + err = f.filesystem.Rename(tempPath, f.DbPath) + if err != nil { + return err + } + + f.verbosef("Wrote db in %v\n", time.Now().Sub(serializeDate)) + return nil +} + +func (f *Finder) statDirAsync(dir *pathMap) { + node := dir + path := dir.path + f.threadPool.Run( + func() { + updatedStats := f.statDirSync(path) + + if !f.isInfoUpToDate(node.statResponse, updatedStats) { + node.mapNode = mapNode{ + statResponse: updatedStats, + FileNames: []string{}, + } + f.setModified() + if node.statResponse.ModTime != 0 { + // modification time was updated, so re-scan for + // child directories + f.listDirAsync(dir) + } + } + }, + ) +} + +func (f *Finder) statDirSync(path string) statResponse { + + fileInfo, err := f.filesystem.Lstat(path) + + var stats statResponse + if err != nil { + // in case of a failure to stat the directory, treat the directory as missing (modTime = 0) + return stats + } + modTime := fileInfo.ModTime() + stats = statResponse{} + inode, err := f.filesystem.InodeNumber(fileInfo) + if err != nil { + panic(fmt.Sprintf("Could not get inode number of %v: %v\n", path, err.Error())) + } + stats.Inode = inode + device, err := f.filesystem.DeviceNumber(fileInfo) + if err != nil { + panic(fmt.Sprintf("Could not get device number of %v: %v\n", path, err.Error())) + } + stats.Device = device + permissionsChangeTime, err := f.filesystem.PermTime(fileInfo) + + if err != nil { + panic(fmt.Sprintf("Could not get permissions modification time (CTime) of %v: %v\n", path, err.Error())) + } + // We're only interested in knowing whether anything about the directory + // has changed since last check, so we use the latest of the two + // modification times (content modification (mtime) and + // permission modification (ctime)) + if permissionsChangeTime.After(modTime) { + modTime = permissionsChangeTime + } + stats.ModTime = modTime.UnixNano() + + return stats +} + +// pruneCacheCandidates removes the items that we don't want to include in our persistent cache +func (f *Finder) pruneCacheCandidates(items *DirEntries) { + + for _, fileName := range items.FileNames { + for _, abortedName := range f.cacheMetadata.Config.PruneFiles { + if fileName == abortedName { + items.FileNames = []string{} + items.DirNames = []string{} + return + } + } + } + + // remove any files that aren't the ones we want to include + writeIndex := 0 + for _, fileName := range items.FileNames { + // include only these files + for _, includedName := range f.cacheMetadata.Config.IncludeFiles { + if fileName == includedName { + items.FileNames[writeIndex] = fileName + writeIndex++ + break + } + } + } + // resize + items.FileNames = items.FileNames[:writeIndex] + + writeIndex = 0 + for _, dirName := range items.DirNames { + items.DirNames[writeIndex] = dirName + // ignore other dirs that are known to not be inputs to the build process + include := true + for _, excludedName := range f.cacheMetadata.Config.ExcludeDirs { + if dirName == excludedName { + // don't include + include = false + break + } + } + if include { + writeIndex++ + } + } + // resize + items.DirNames = items.DirNames[:writeIndex] +} + +func (f *Finder) listDirsAsync(nodes []*pathMap) { + f.threadPool.Run( + func() { + for i := range nodes { + f.listDirSync(nodes[i]) + } + }, + ) +} + +func (f *Finder) listDirAsync(node *pathMap) { + f.threadPool.Run( + func() { + f.listDirSync(node) + }, + ) +} + +func (f *Finder) listDirSync(dir *pathMap) { + path := dir.path + children, err := f.filesystem.ReadDir(path) + + if err != nil { + // if listing the contents of the directory fails (presumably due to + // permission denied), then treat the directory as empty + children = []os.FileInfo{} + } + + var subdirs []string + var subfiles []string + + for _, child := range children { + linkBits := child.Mode() & os.ModeSymlink + isLink := linkBits != 0 + if child.IsDir() { + if !isLink { + // Skip symlink dirs. + // We don't have to support symlink dirs because + // that would cause duplicates. + subdirs = append(subdirs, child.Name()) + } + } else { + // We do have to support symlink files because the link name might be + // different than the target name + // (for example, Android.bp -> build/soong/root.bp) + subfiles = append(subfiles, child.Name()) + } + + } + parentNode := dir + + entry := &DirEntries{Path: path, DirNames: subdirs, FileNames: subfiles} + f.pruneCacheCandidates(entry) + + // create a pathMap node for each relevant subdirectory + relevantChildren := map[string]*pathMap{} + for _, subdirName := range entry.DirNames { + childNode, found := parentNode.children[subdirName] + // if we already knew of this directory, then we already have a request pending to Stat it + // if we didn't already know of this directory, then we must Stat it now + if !found { + childNode = parentNode.newChild(subdirName) + f.statDirAsync(childNode) + } + relevantChildren[subdirName] = childNode + } + // Note that in rare cases, it's possible that we're reducing the set of + // children via this statement, if these are all true: + // 1. we previously had a cache that knew about subdirectories of parentNode + // 2. the user created a prune-file (described in pruneCacheCandidates) + // inside , which specifies that the contents of parentNode + // are to be ignored. + // The fact that it's possible to remove children here means that *pathMap structs + // must not be looked up from f.nodes by filepath (and instead must be accessed by + // direct pointer) until after every listDirSync completes + parentNode.FileNames = entry.FileNames + parentNode.children = relevantChildren + +} + +// listMatches takes a node and a function that specifies which subdirectories and +// files to include, and listMatches returns the matches +func (f *Finder) listMatches(node *pathMap, + filter WalkFunc) (subDirs []*pathMap, filePaths []string) { + entries := DirEntries{ + FileNames: node.FileNames, + } + entries.DirNames = make([]string, 0, len(node.children)) + for childName := range node.children { + entries.DirNames = append(entries.DirNames, childName) + } + + dirNames, fileNames := filter(entries) + + subDirs = []*pathMap{} + filePaths = make([]string, 0, len(fileNames)) + for _, fileName := range fileNames { + filePaths = append(filePaths, joinCleanPaths(node.path, fileName)) + } + subDirs = make([]*pathMap, 0, len(dirNames)) + for _, childName := range dirNames { + child, ok := node.children[childName] + if ok { + subDirs = append(subDirs, child) + } + } + + return subDirs, filePaths +} + +// findInCacheMultithreaded spawns potentially multiple goroutines with which to search the cache. +func (f *Finder) findInCacheMultithreaded(node *pathMap, filter WalkFunc, + approxNumThreads int) []string { + + if approxNumThreads < 2 { + // Done spawning threads; process remaining directories + return f.findInCacheSinglethreaded(node, filter) + } + + totalWork := 0 + for _, child := range node.children { + totalWork += child.approximateNumDescendents + } + childrenResults := make(chan []string, len(node.children)) + + subDirs, filePaths := f.listMatches(node, filter) + + // process child directories + for _, child := range subDirs { + numChildThreads := approxNumThreads * child.approximateNumDescendents / totalWork + childProcessor := func(child *pathMap) { + childResults := f.findInCacheMultithreaded(child, filter, numChildThreads) + childrenResults <- childResults + } + // If we're allowed to use more than 1 thread to process this directory, + // then instead we use 1 thread for each subdirectory. + // It would be strange to spawn threads for only some subdirectories. + go childProcessor(child) + } + + // collect results + for i := 0; i < len(subDirs); i++ { + childResults := <-childrenResults + filePaths = append(filePaths, childResults...) + } + close(childrenResults) + + return filePaths +} + +// findInCacheSinglethreaded synchronously searches the cache for all matching file paths +// note findInCacheSinglethreaded runs 2X to 4X as fast by being iterative rather than recursive +func (f *Finder) findInCacheSinglethreaded(node *pathMap, filter WalkFunc) []string { + if node == nil { + return []string{} + } + + nodes := []*pathMap{node} + matches := []string{} + + for len(nodes) > 0 { + currentNode := nodes[0] + nodes = nodes[1:] + + subDirs, filePaths := f.listMatches(currentNode, filter) + + nodes = append(nodes, subDirs...) + + matches = append(matches, filePaths...) + } + return matches +} diff --git a/finder/finder_test.go b/finder/finder_test.go new file mode 100644 index 000000000..60e5eb281 --- /dev/null +++ b/finder/finder_test.go @@ -0,0 +1,1573 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package finder + +import ( + "fmt" + "log" + "path/filepath" + "reflect" + "testing" + + "sort" + + "io/ioutil" + + "android/soong/fs" + "runtime/debug" + "time" +) + +// some utils for tests to use +func newFs() *fs.MockFs { + return fs.NewMockFs(map[string][]byte{}) +} + +func newFinder(t *testing.T, filesystem *fs.MockFs, cacheParams CacheParams) *Finder { + cachePath := "/finder/finder-db" + cacheDir := filepath.Dir(cachePath) + filesystem.MkDirs(cacheDir) + if cacheParams.WorkingDirectory == "" { + cacheParams.WorkingDirectory = "/cwd" + } + + logger := log.New(ioutil.Discard, "", 0) + finder := New(cacheParams, filesystem, logger, cachePath) + return finder +} + +func finderWithSameParams(t *testing.T, original *Finder) *Finder { + return New( + original.cacheMetadata.Config.CacheParams, + original.filesystem, + original.logger, + original.DbPath) +} + +func write(t *testing.T, path string, content string, filesystem *fs.MockFs) { + parent := filepath.Dir(path) + filesystem.MkDirs(parent) + err := filesystem.WriteFile(path, []byte(content), 0777) + if err != nil { + t.Fatal(err.Error()) + } +} + +func create(t *testing.T, path string, filesystem *fs.MockFs) { + write(t, path, "hi", filesystem) +} + +func delete(t *testing.T, path string, filesystem *fs.MockFs) { + err := filesystem.Remove(path) + if err != nil { + t.Fatal(err.Error()) + } +} + +func removeAll(t *testing.T, path string, filesystem *fs.MockFs) { + err := filesystem.RemoveAll(path) + if err != nil { + t.Fatal(err.Error()) + } +} + +func move(t *testing.T, oldPath string, newPath string, filesystem *fs.MockFs) { + err := filesystem.Rename(oldPath, newPath) + if err != nil { + t.Fatal(err.Error()) + } +} + +func link(t *testing.T, newPath string, oldPath string, filesystem *fs.MockFs) { + parentPath := filepath.Dir(newPath) + err := filesystem.MkDirs(parentPath) + if err != nil { + t.Fatal(err.Error()) + } + err = filesystem.Symlink(oldPath, newPath) + if err != nil { + t.Fatal(err.Error()) + } +} +func read(t *testing.T, path string, filesystem *fs.MockFs) string { + reader, err := filesystem.Open(path) + if err != nil { + t.Fatalf(err.Error()) + } + bytes, err := ioutil.ReadAll(reader) + if err != nil { + t.Fatal(err.Error()) + } + return string(bytes) +} +func modTime(t *testing.T, path string, filesystem *fs.MockFs) time.Time { + stats, err := filesystem.Lstat(path) + if err != nil { + t.Fatal(err.Error()) + } + return stats.ModTime() +} +func setReadable(t *testing.T, path string, readable bool, filesystem *fs.MockFs) { + err := filesystem.SetReadable(path, readable) + if err != nil { + t.Fatal(err.Error()) + } +} +func fatal(t *testing.T, message string) { + t.Error(message) + debug.PrintStack() + t.FailNow() +} +func assertSameResponse(t *testing.T, actual []string, expected []string) { + sort.Strings(actual) + sort.Strings(expected) + if !reflect.DeepEqual(actual, expected) { + fatal( + t, + fmt.Sprintf( + "Expected Finder to return these %v paths:\n %v,\ninstead returned these %v paths: %v\n", + len(expected), expected, len(actual), actual), + ) + } +} + +func assertSameStatCalls(t *testing.T, actual []string, expected []string) { + sort.Strings(actual) + sort.Strings(expected) + + if !reflect.DeepEqual(actual, expected) { + fatal( + t, + fmt.Sprintf( + "Finder made incorrect Stat calls.\n"+ + "Actual:\n"+ + "%v\n"+ + "Expected:\n"+ + "%v\n"+ + "\n", + actual, expected), + ) + } +} +func assertSameReadDirCalls(t *testing.T, actual []string, expected []string) { + sort.Strings(actual) + sort.Strings(expected) + + if !reflect.DeepEqual(actual, expected) { + fatal( + t, + fmt.Sprintf( + "Finder made incorrect ReadDir calls.\n"+ + "Actual:\n"+ + "%v\n"+ + "Expected:\n"+ + "%v\n"+ + "\n", + actual, expected), + ) + } +} + +// runSimpleTests creates a few files, searches for findme.txt, and checks for the expected matches +func runSimpleTest(t *testing.T, existentPaths []string, expectedMatches []string) { + filesystem := newFs() + root := "/tmp" + filesystem.MkDirs(root) + for _, path := range existentPaths { + create(t, filepath.Join(root, path), filesystem) + } + + finder := newFinder(t, + filesystem, + CacheParams{ + "/cwd", + []string{root}, + nil, + nil, + []string{"findme.txt", "skipme.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt(root, "findme.txt") + absoluteMatches := []string{} + for i := range expectedMatches { + absoluteMatches = append(absoluteMatches, filepath.Join(root, expectedMatches[i])) + } + assertSameResponse(t, foundPaths, absoluteMatches) +} + +// end of utils, start of individual tests + +func TestSingleFile(t *testing.T) { + runSimpleTest(t, + []string{"findme.txt"}, + []string{"findme.txt"}, + ) +} + +func TestIncludeFiles(t *testing.T) { + runSimpleTest(t, + []string{"findme.txt", "skipme.txt"}, + []string{"findme.txt"}, + ) +} + +func TestNestedDirectories(t *testing.T) { + runSimpleTest(t, + []string{"findme.txt", "skipme.txt", "subdir/findme.txt", "subdir/skipme.txt"}, + []string{"findme.txt", "subdir/findme.txt"}, + ) +} + +func TestEmptyDirectory(t *testing.T) { + runSimpleTest(t, + []string{}, + []string{}, + ) +} + +func TestEmptyPath(t *testing.T) { + filesystem := newFs() + root := "/tmp" + create(t, filepath.Join(root, "findme.txt"), filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{root}, + IncludeFiles: []string{"findme.txt", "skipme.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt("", "findme.txt") + + assertSameResponse(t, foundPaths, []string{}) +} + +func TestFilesystemRoot(t *testing.T) { + filesystem := newFs() + root := "/" + createdPath := "/findme.txt" + create(t, createdPath, filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{root}, + IncludeFiles: []string{"findme.txt", "skipme.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt(root, "findme.txt") + + assertSameResponse(t, foundPaths, []string{createdPath}) +} + +func TestNonexistentPath(t *testing.T) { + filesystem := newFs() + create(t, "/tmp/findme.txt", filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp/IDontExist"}, + IncludeFiles: []string{"findme.txt", "skipme.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt("/tmp/IAlsoDontExist", "findme.txt") + + assertSameResponse(t, foundPaths, []string{}) +} + +func TestExcludeDirs(t *testing.T) { + filesystem := newFs() + create(t, "/tmp/exclude/findme.txt", filesystem) + create(t, "/tmp/exclude/subdir/findme.txt", filesystem) + create(t, "/tmp/subdir/exclude/findme.txt", filesystem) + create(t, "/tmp/subdir/subdir/findme.txt", filesystem) + create(t, "/tmp/subdir/findme.txt", filesystem) + create(t, "/tmp/findme.txt", filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + ExcludeDirs: []string{"exclude"}, + IncludeFiles: []string{"findme.txt", "skipme.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + + assertSameResponse(t, foundPaths, + []string{"/tmp/findme.txt", + "/tmp/subdir/findme.txt", + "/tmp/subdir/subdir/findme.txt"}) +} + +func TestPruneFiles(t *testing.T) { + filesystem := newFs() + create(t, "/tmp/out/findme.txt", filesystem) + create(t, "/tmp/out/.ignore-out-dir", filesystem) + create(t, "/tmp/out/child/findme.txt", filesystem) + + create(t, "/tmp/out2/.ignore-out-dir", filesystem) + create(t, "/tmp/out2/sub/findme.txt", filesystem) + + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/include/findme.txt", filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + PruneFiles: []string{".ignore-out-dir"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + + assertSameResponse(t, foundPaths, + []string{"/tmp/findme.txt", + "/tmp/include/findme.txt"}) +} + +func TestRootDir(t *testing.T) { + filesystem := newFs() + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/a/subdir/findme.txt", filesystem) + create(t, "/tmp/b/findme.txt", filesystem) + create(t, "/tmp/b/subdir/findme.txt", filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp/a"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt("/tmp/a", "findme.txt") + + assertSameResponse(t, foundPaths, + []string{"/tmp/a/findme.txt", + "/tmp/a/subdir/findme.txt"}) +} + +func TestUncachedDir(t *testing.T) { + filesystem := newFs() + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/a/subdir/findme.txt", filesystem) + create(t, "/tmp/b/findme.txt", filesystem) + create(t, "/tmp/b/subdir/findme.txt", filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/IDoNotExist"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + + foundPaths := finder.FindNamedAt("/tmp/a", "findme.txt") + // If the caller queries for a file that is in the cache, then computing the + // correct answer won't be fast, and it would be easy for the caller to + // fail to notice its slowness. Instead, we only ever search the cache for files + // to return, which enforces that we can determine which files will be + // interesting upfront. + assertSameResponse(t, foundPaths, []string{}) + + finder.Shutdown() +} + +func TestSearchingForFilesExcludedFromCache(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/a/misc.txt", filesystem) + + // set up the finder and run it + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "misc.txt") + // If the caller queries for a file that is in the cache, then computing the + // correct answer won't be fast, and it would be easy for the caller to + // fail to notice its slowness. Instead, we only ever search the cache for files + // to return, which enforces that we can determine which files will be + // interesting upfront. + assertSameResponse(t, foundPaths, []string{}) + + finder.Shutdown() +} + +func TestRelativeFilePaths(t *testing.T) { + filesystem := newFs() + + create(t, "/tmp/ignore/hi.txt", filesystem) + create(t, "/tmp/include/hi.txt", filesystem) + create(t, "/cwd/hi.txt", filesystem) + create(t, "/cwd/a/hi.txt", filesystem) + create(t, "/cwd/a/a/hi.txt", filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/cwd", "/tmp/include"}, + IncludeFiles: []string{"hi.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt("a", "hi.txt") + assertSameResponse(t, foundPaths, + []string{"a/hi.txt", + "a/a/hi.txt"}) + + foundPaths = finder.FindNamedAt("/tmp/include", "hi.txt") + assertSameResponse(t, foundPaths, []string{"/tmp/include/hi.txt"}) + + foundPaths = finder.FindNamedAt(".", "hi.txt") + assertSameResponse(t, foundPaths, + []string{"hi.txt", + "a/hi.txt", + "a/a/hi.txt"}) + + foundPaths = finder.FindNamedAt("/tmp/include", "hi.txt") + assertSameResponse(t, foundPaths, []string{"/tmp/include/hi.txt"}) +} + +// have to run this test with the race-detector (`go test -race src/android/soong/finder/*.go`) +// for there to be much chance of the test actually detecting any error that may be present +func TestRootDirsContainedInOtherRootDirs(t *testing.T) { + filesystem := newFs() + + create(t, "/tmp/a/b/c/d/e/f/g/h/i/j/findme.txt", filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/", "/a/b/c", "/a/b/c/d/e/f", "/a/b/c/d/e/f/g/h/i"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt("/tmp/a", "findme.txt") + + assertSameResponse(t, foundPaths, + []string{"/tmp/a/b/c/d/e/f/g/h/i/j/findme.txt"}) +} + +func TestFindFirst(t *testing.T) { + filesystem := newFs() + create(t, "/tmp/a/hi.txt", filesystem) + create(t, "/tmp/b/hi.txt", filesystem) + create(t, "/tmp/b/a/hi.txt", filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"hi.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindFirstNamed("hi.txt") + + assertSameResponse(t, foundPaths, + []string{"/tmp/a/hi.txt", + "/tmp/b/hi.txt"}, + ) +} + +func TestConcurrentFindSameDirectory(t *testing.T) { + filesystem := newFs() + + // create a bunch of files and directories + paths := []string{} + for i := 0; i < 10; i++ { + parentDir := fmt.Sprintf("/tmp/%v", i) + for j := 0; j < 10; j++ { + filePath := filepath.Join(parentDir, fmt.Sprintf("%v/findme.txt", j)) + paths = append(paths, filePath) + } + } + sort.Strings(paths) + for _, path := range paths { + create(t, path, filesystem) + } + + // set up a finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + defer finder.Shutdown() + + numTests := 20 + results := make(chan []string, numTests) + // make several parallel calls to the finder + for i := 0; i < numTests; i++ { + go func() { + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + results <- foundPaths + }() + } + + // check that each response was correct + for i := 0; i < numTests; i++ { + foundPaths := <-results + assertSameResponse(t, foundPaths, paths) + } +} + +func TestConcurrentFindDifferentDirectories(t *testing.T) { + filesystem := newFs() + + // create a bunch of files and directories + allFiles := []string{} + numSubdirs := 10 + rootPaths := []string{} + queryAnswers := [][]string{} + for i := 0; i < numSubdirs; i++ { + parentDir := fmt.Sprintf("/tmp/%v", i) + rootPaths = append(rootPaths, parentDir) + queryAnswers = append(queryAnswers, []string{}) + for j := 0; j < 10; j++ { + filePath := filepath.Join(parentDir, fmt.Sprintf("%v/findme.txt", j)) + queryAnswers[i] = append(queryAnswers[i], filePath) + allFiles = append(allFiles, filePath) + } + sort.Strings(queryAnswers[i]) + } + sort.Strings(allFiles) + for _, path := range allFiles { + create(t, path, filesystem) + } + + // set up a finder + finder := newFinder( + t, + filesystem, + + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + defer finder.Shutdown() + + type testRun struct { + path string + foundMatches []string + correctMatches []string + } + + numTests := numSubdirs + 1 + testRuns := make(chan testRun, numTests) + + searchAt := func(path string, correctMatches []string) { + foundPaths := finder.FindNamedAt(path, "findme.txt") + testRuns <- testRun{path, foundPaths, correctMatches} + } + + // make several parallel calls to the finder + go searchAt("/tmp", allFiles) + for i := 0; i < len(rootPaths); i++ { + go searchAt(rootPaths[i], queryAnswers[i]) + } + + // check that each response was correct + for i := 0; i < numTests; i++ { + testRun := <-testRuns + assertSameResponse(t, testRun.foundMatches, testRun.correctMatches) + } +} + +func TestStrangelyFormattedPaths(t *testing.T) { + filesystem := newFs() + + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/b/findme.txt", filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"//tmp//a//.."}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt("//tmp//a//..", "findme.txt") + + assertSameResponse(t, foundPaths, + []string{"/tmp/a/findme.txt", + "/tmp/b/findme.txt", + "/tmp/findme.txt"}) +} + +func TestCorruptedCacheHeader(t *testing.T) { + filesystem := newFs() + + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + write(t, "/finder/finder-db", "sample header", filesystem) + + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + defer finder.Shutdown() + + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + + assertSameResponse(t, foundPaths, + []string{"/tmp/a/findme.txt", + "/tmp/findme.txt"}) +} + +func TestCanUseCache(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + // check the response of the first finder + correctResponse := []string{"/tmp/a/findme.txt", + "/tmp/findme.txt"} + assertSameResponse(t, foundPaths, correctResponse) + finder.Shutdown() + + // check results + cacheText := read(t, finder.DbPath, filesystem) + if len(cacheText) < 1 { + t.Fatalf("saved cache db is empty\n") + } + if len(filesystem.StatCalls) == 0 { + t.Fatal("No Stat calls recorded by mock filesystem") + } + if len(filesystem.ReadDirCalls) == 0 { + t.Fatal("No ReadDir calls recorded by filesystem") + } + statCalls := filesystem.StatCalls + filesystem.ClearMetrics() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindNamedAt("/tmp", "findme.txt") + // check results + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{}) + assertSameReadDirCalls(t, filesystem.StatCalls, statCalls) + + finder2.Shutdown() +} + +func TestCorruptedCacheBody(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + finder.Shutdown() + + // check the response of the first finder + correctResponse := []string{"/tmp/a/findme.txt", + "/tmp/findme.txt"} + assertSameResponse(t, foundPaths, correctResponse) + numStatCalls := len(filesystem.StatCalls) + numReadDirCalls := len(filesystem.ReadDirCalls) + + // load the cache file, corrupt it, and save it + cacheReader, err := filesystem.Open(finder.DbPath) + if err != nil { + t.Fatal(err) + } + cacheData, err := ioutil.ReadAll(cacheReader) + if err != nil { + t.Fatal(err) + } + cacheData = append(cacheData, []byte("DontMindMe")...) + filesystem.WriteFile(finder.DbPath, cacheData, 0777) + filesystem.ClearMetrics() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindNamedAt("/tmp", "findme.txt") + // check results + assertSameResponse(t, foundPaths, correctResponse) + numNewStatCalls := len(filesystem.StatCalls) + numNewReadDirCalls := len(filesystem.ReadDirCalls) + // It's permissable to make more Stat calls with a corrupted cache because + // the Finder may restart once it detects corruption. + // However, it may have already issued many Stat calls. + // Because a corrupted db is not expected to be a common (or even a supported case), + // we don't care to optimize it and don't cache the already-issued Stat calls + if numNewReadDirCalls < numReadDirCalls { + t.Fatalf( + "Finder made fewer ReadDir calls with a corrupted cache (%v calls) than with no cache"+ + " (%v calls)", + numNewReadDirCalls, numReadDirCalls) + } + if numNewStatCalls < numStatCalls { + t.Fatalf( + "Finder made fewer Stat calls with a corrupted cache (%v calls) than with no cache (%v calls)", + numNewStatCalls, numStatCalls) + } + finder2.Shutdown() +} + +func TestStatCalls(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/a/findme.txt", filesystem) + + // run finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + finder.Shutdown() + + // check response + assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"}) + assertSameStatCalls(t, filesystem.StatCalls, []string{"/tmp", "/tmp/a"}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/a"}) +} + +func TestFileAdded(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/ignoreme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/b/ignore.txt", filesystem) + create(t, "/tmp/b/c/nope.txt", filesystem) + create(t, "/tmp/b/c/d/irrelevant.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + filesystem.Clock.Tick() + finder.Shutdown() + // check the response of the first finder + assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"}) + + // modify the filesystem + filesystem.Clock.Tick() + create(t, "/tmp/b/c/findme.txt", filesystem) + filesystem.Clock.Tick() + filesystem.ClearMetrics() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindNamedAt("/tmp", "findme.txt") + + // check results + assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt", "/tmp/b/c/findme.txt"}) + assertSameStatCalls(t, filesystem.StatCalls, []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/c", "/tmp/b/c/d"}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b/c"}) + finder2.Shutdown() + +} + +func TestDirectoriesAdded(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/ignoreme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/b/ignore.txt", filesystem) + create(t, "/tmp/b/c/nope.txt", filesystem) + create(t, "/tmp/b/c/d/irrelevant.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + finder.Shutdown() + // check the response of the first finder + assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"}) + + // modify the filesystem + filesystem.Clock.Tick() + create(t, "/tmp/b/c/new/findme.txt", filesystem) + create(t, "/tmp/b/c/new/new2/findme.txt", filesystem) + create(t, "/tmp/b/c/new/new2/ignoreme.txt", filesystem) + filesystem.ClearMetrics() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindNamedAt("/tmp", "findme.txt") + + // check results + assertSameResponse(t, foundPaths, + []string{"/tmp/a/findme.txt", "/tmp/b/c/new/findme.txt", "/tmp/b/c/new/new2/findme.txt"}) + assertSameStatCalls(t, filesystem.StatCalls, + []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/c", "/tmp/b/c/d", "/tmp/b/c/new", "/tmp/b/c/new/new2"}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b/c", "/tmp/b/c/new", "/tmp/b/c/new/new2"}) + + finder2.Shutdown() +} + +func TestDirectoryAndSubdirectoryBothUpdated(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/hi1.txt", filesystem) + create(t, "/tmp/a/hi1.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"hi1.txt", "hi2.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "hi1.txt") + finder.Shutdown() + // check the response of the first finder + assertSameResponse(t, foundPaths, []string{"/tmp/hi1.txt", "/tmp/a/hi1.txt"}) + + // modify the filesystem + filesystem.Clock.Tick() + create(t, "/tmp/hi2.txt", filesystem) + create(t, "/tmp/a/hi2.txt", filesystem) + filesystem.ClearMetrics() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindAll() + + // check results + assertSameResponse(t, foundPaths, + []string{"/tmp/hi1.txt", "/tmp/hi2.txt", "/tmp/a/hi1.txt", "/tmp/a/hi2.txt"}) + assertSameStatCalls(t, filesystem.StatCalls, + []string{"/tmp", "/tmp/a"}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/a"}) + + finder2.Shutdown() +} + +func TestFileDeleted(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/ignoreme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/b/findme.txt", filesystem) + create(t, "/tmp/b/c/nope.txt", filesystem) + create(t, "/tmp/b/c/d/irrelevant.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + finder.Shutdown() + // check the response of the first finder + assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt", "/tmp/b/findme.txt"}) + + // modify the filesystem + filesystem.Clock.Tick() + delete(t, "/tmp/b/findme.txt", filesystem) + filesystem.ClearMetrics() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindNamedAt("/tmp", "findme.txt") + + // check results + assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"}) + assertSameStatCalls(t, filesystem.StatCalls, []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/c", "/tmp/b/c/d"}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b"}) + + finder2.Shutdown() +} + +func TestDirectoriesDeleted(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/a/1/findme.txt", filesystem) + create(t, "/tmp/a/1/2/findme.txt", filesystem) + create(t, "/tmp/b/findme.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + finder.Shutdown() + // check the response of the first finder + assertSameResponse(t, foundPaths, + []string{"/tmp/findme.txt", + "/tmp/a/findme.txt", + "/tmp/a/1/findme.txt", + "/tmp/a/1/2/findme.txt", + "/tmp/b/findme.txt"}) + + // modify the filesystem + filesystem.Clock.Tick() + removeAll(t, "/tmp/a/1", filesystem) + filesystem.ClearMetrics() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindNamedAt("/tmp", "findme.txt") + + // check results + assertSameResponse(t, foundPaths, + []string{"/tmp/findme.txt", "/tmp/a/findme.txt", "/tmp/b/findme.txt"}) + // Technically, we don't care whether /tmp/a/1/2 gets Statted or gets skipped + // if the Finder detects the nonexistence of /tmp/a/1 + // However, when resuming from cache, we don't want the Finder to necessarily wait + // to stat a directory until after statting its parent. + // So here we just include /tmp/a/1/2 in the list. + // The Finder is currently implemented to always restat every dir and + // to not short-circuit due to nonexistence of parents (but it will remove + // missing dirs from the cache for next time) + assertSameStatCalls(t, filesystem.StatCalls, + []string{"/tmp", "/tmp/a", "/tmp/a/1", "/tmp/a/1/2", "/tmp/b"}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/a"}) + + finder2.Shutdown() +} + +func TestDirectoriesMoved(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/a/1/findme.txt", filesystem) + create(t, "/tmp/a/1/2/findme.txt", filesystem) + create(t, "/tmp/b/findme.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + finder.Shutdown() + // check the response of the first finder + assertSameResponse(t, foundPaths, + []string{"/tmp/findme.txt", + "/tmp/a/findme.txt", + "/tmp/a/1/findme.txt", + "/tmp/a/1/2/findme.txt", + "/tmp/b/findme.txt"}) + + // modify the filesystem + filesystem.Clock.Tick() + move(t, "/tmp/a", "/tmp/c", filesystem) + filesystem.ClearMetrics() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindNamedAt("/tmp", "findme.txt") + + // check results + assertSameResponse(t, foundPaths, + []string{"/tmp/findme.txt", + "/tmp/b/findme.txt", + "/tmp/c/findme.txt", + "/tmp/c/1/findme.txt", + "/tmp/c/1/2/findme.txt"}) + // Technically, we don't care whether /tmp/a/1/2 gets Statted or gets skipped + // if the Finder detects the nonexistence of /tmp/a/1 + // However, when resuming from cache, we don't want the Finder to necessarily wait + // to stat a directory until after statting its parent. + // So here we just include /tmp/a/1/2 in the list. + // The Finder is currently implemented to always restat every dir and + // to not short-circuit due to nonexistence of parents (but it will remove + // missing dirs from the cache for next time) + assertSameStatCalls(t, filesystem.StatCalls, + []string{"/tmp", "/tmp/a", "/tmp/a/1", "/tmp/a/1/2", "/tmp/b", "/tmp/c", "/tmp/c/1", "/tmp/c/1/2"}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/c", "/tmp/c/1", "/tmp/c/1/2"}) + finder2.Shutdown() +} + +func TestDirectoriesSwapped(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/a/1/findme.txt", filesystem) + create(t, "/tmp/a/1/2/findme.txt", filesystem) + create(t, "/tmp/b/findme.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + finder.Shutdown() + // check the response of the first finder + assertSameResponse(t, foundPaths, + []string{"/tmp/findme.txt", + "/tmp/a/findme.txt", + "/tmp/a/1/findme.txt", + "/tmp/a/1/2/findme.txt", + "/tmp/b/findme.txt"}) + + // modify the filesystem + filesystem.Clock.Tick() + move(t, "/tmp/a", "/tmp/temp", filesystem) + move(t, "/tmp/b", "/tmp/a", filesystem) + move(t, "/tmp/temp", "/tmp/b", filesystem) + filesystem.ClearMetrics() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindNamedAt("/tmp", "findme.txt") + + // check results + assertSameResponse(t, foundPaths, + []string{"/tmp/findme.txt", + "/tmp/a/findme.txt", + "/tmp/b/findme.txt", + "/tmp/b/1/findme.txt", + "/tmp/b/1/2/findme.txt"}) + // Technically, we don't care whether /tmp/a/1/2 gets Statted or gets skipped + // if the Finder detects the nonexistence of /tmp/a/1 + // However, when resuming from cache, we don't want the Finder to necessarily wait + // to stat a directory until after statting its parent. + // So here we just include /tmp/a/1/2 in the list. + // The Finder is currently implemented to always restat every dir and + // to not short-circuit due to nonexistence of parents (but it will remove + // missing dirs from the cache for next time) + assertSameStatCalls(t, filesystem.StatCalls, + []string{"/tmp", "/tmp/a", "/tmp/a/1", "/tmp/a/1/2", "/tmp/b", "/tmp/b/1", "/tmp/b/1/2"}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/1", "/tmp/b/1/2"}) + finder2.Shutdown() +} + +// runFsReplacementTest tests a change modifying properties of the filesystem itself: +// runFsReplacementTest tests changing the user, the hostname, or the device number +// runFsReplacementTest is a helper method called by other tests +func runFsReplacementTest(t *testing.T, fs1 *fs.MockFs, fs2 *fs.MockFs) { + // setup fs1 + create(t, "/tmp/findme.txt", fs1) + create(t, "/tmp/a/findme.txt", fs1) + create(t, "/tmp/a/a/findme.txt", fs1) + + // setup fs2 to have the same directories but different files + create(t, "/tmp/findme.txt", fs2) + create(t, "/tmp/a/findme.txt", fs2) + create(t, "/tmp/a/a/ignoreme.txt", fs2) + create(t, "/tmp/a/b/findme.txt", fs2) + + // run the first finder + finder := newFinder( + t, + fs1, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + finder.Shutdown() + // check the response of the first finder + assertSameResponse(t, foundPaths, + []string{"/tmp/findme.txt", "/tmp/a/findme.txt", "/tmp/a/a/findme.txt"}) + + // copy the cache data from the first filesystem to the second + cacheContent := read(t, finder.DbPath, fs1) + write(t, finder.DbPath, cacheContent, fs2) + + // run the second finder, with the same config and same cache contents but a different filesystem + finder2 := newFinder( + t, + fs2, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths = finder2.FindNamedAt("/tmp", "findme.txt") + + // check results + assertSameResponse(t, foundPaths, + []string{"/tmp/findme.txt", "/tmp/a/findme.txt", "/tmp/a/b/findme.txt"}) + assertSameStatCalls(t, fs2.StatCalls, + []string{"/tmp", "/tmp/a", "/tmp/a/a", "/tmp/a/b"}) + assertSameReadDirCalls(t, fs2.ReadDirCalls, + []string{"/tmp", "/tmp/a", "/tmp/a/a", "/tmp/a/b"}) + finder2.Shutdown() +} + +func TestChangeOfDevice(t *testing.T) { + fs1 := newFs() + // not as fine-grained mounting controls as a real filesystem, but should be adequate + fs1.SetDeviceNumber(0) + + fs2 := newFs() + fs2.SetDeviceNumber(1) + + runFsReplacementTest(t, fs1, fs2) +} + +func TestChangeOfUserOrHost(t *testing.T) { + fs1 := newFs() + fs1.SetViewId("me@here") + + fs2 := newFs() + fs2.SetViewId("you@there") + + runFsReplacementTest(t, fs1, fs2) +} + +func TestConsistentCacheOrdering(t *testing.T) { + // setup filesystem + filesystem := newFs() + for i := 0; i < 5; i++ { + create(t, fmt.Sprintf("/tmp/%v/findme.txt", i), filesystem) + } + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + finder.FindNamedAt("/tmp", "findme.txt") + finder.Shutdown() + + // read db file + string1 := read(t, finder.DbPath, filesystem) + + err := filesystem.Remove(finder.DbPath) + if err != nil { + t.Fatal(err) + } + + // run another finder + finder2 := finderWithSameParams(t, finder) + finder2.FindNamedAt("/tmp", "findme.txt") + finder2.Shutdown() + + string2 := read(t, finder.DbPath, filesystem) + + if string1 != string2 { + t.Errorf("Running Finder twice generated two dbs not having identical contents.\n"+ + "Content of first file:\n"+ + "\n"+ + "%v"+ + "\n"+ + "\n"+ + "Content of second file:\n"+ + "\n"+ + "%v\n"+ + "\n", + string1, + string2, + ) + } + +} + +func TestNumSyscallsOfSecondFind(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/a/misc.txt", filesystem) + + // set up the finder and run it once + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + assertSameResponse(t, foundPaths, []string{"/tmp/findme.txt", "/tmp/a/findme.txt"}) + + filesystem.ClearMetrics() + + // run the finder again and confirm it doesn't check the filesystem + refoundPaths := finder.FindNamedAt("/tmp", "findme.txt") + assertSameResponse(t, refoundPaths, foundPaths) + assertSameStatCalls(t, filesystem.StatCalls, []string{}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{}) + + finder.Shutdown() +} + +func TestChangingParamsOfSecondFind(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/findme.txt", filesystem) + create(t, "/tmp/a/findme.txt", filesystem) + create(t, "/tmp/a/metoo.txt", filesystem) + + // set up the finder and run it once + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"findme.txt", "metoo.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "findme.txt") + assertSameResponse(t, foundPaths, []string{"/tmp/findme.txt", "/tmp/a/findme.txt"}) + + filesystem.ClearMetrics() + + // run the finder again and confirm it gets the right answer without asking the filesystem + refoundPaths := finder.FindNamedAt("/tmp", "metoo.txt") + assertSameResponse(t, refoundPaths, []string{"/tmp/a/metoo.txt"}) + assertSameStatCalls(t, filesystem.StatCalls, []string{}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{}) + + finder.Shutdown() +} + +func TestSymlinkPointingToFile(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/a/hi.txt", filesystem) + create(t, "/tmp/a/ignoreme.txt", filesystem) + link(t, "/tmp/hi.txt", "a/hi.txt", filesystem) + link(t, "/tmp/b/hi.txt", "../a/hi.txt", filesystem) + link(t, "/tmp/c/hi.txt", "/tmp/hi.txt", filesystem) + link(t, "/tmp/d/hi.txt", "../a/bye.txt", filesystem) + link(t, "/tmp/d/bye.txt", "../a/hi.txt", filesystem) + link(t, "/tmp/e/bye.txt", "../a/bye.txt", filesystem) + link(t, "/tmp/f/hi.txt", "somethingThatDoesntExist", filesystem) + + // set up the finder and run it once + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"hi.txt"}, + }, + ) + foundPaths := finder.FindNamedAt("/tmp", "hi.txt") + // should search based on the name of the link rather than the destination or validity of the link + correctResponse := []string{ + "/tmp/a/hi.txt", + "/tmp/hi.txt", + "/tmp/b/hi.txt", + "/tmp/c/hi.txt", + "/tmp/d/hi.txt", + "/tmp/f/hi.txt", + } + assertSameResponse(t, foundPaths, correctResponse) + +} + +func TestSymlinkPointingToDirectory(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/dir/hi.txt", filesystem) + create(t, "/tmp/dir/ignoreme.txt", filesystem) + + link(t, "/tmp/links/dir", "../dir", filesystem) + link(t, "/tmp/links/link", "../dir", filesystem) + link(t, "/tmp/links/broken", "nothingHere", filesystem) + link(t, "/tmp/links/recursive", "recursive", filesystem) + + // set up the finder and run it once + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"hi.txt"}, + }, + ) + + foundPaths := finder.FindNamedAt("/tmp", "hi.txt") + + // should completely ignore symlinks that point to directories + correctResponse := []string{ + "/tmp/dir/hi.txt", + } + assertSameResponse(t, foundPaths, correctResponse) + +} + +// TestAddPruneFile confirms that adding a prune-file (into a directory for which we +// already had a cache) causes the directory to be ignored +func TestAddPruneFile(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/out/hi.txt", filesystem) + create(t, "/tmp/out/a/hi.txt", filesystem) + create(t, "/tmp/hi.txt", filesystem) + + // do find + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + PruneFiles: []string{".ignore-out-dir"}, + IncludeFiles: []string{"hi.txt"}, + }, + ) + + foundPaths := finder.FindNamedAt("/tmp", "hi.txt") + + // check result + assertSameResponse(t, foundPaths, + []string{"/tmp/hi.txt", + "/tmp/out/hi.txt", + "/tmp/out/a/hi.txt"}, + ) + finder.Shutdown() + + // modify filesystem + filesystem.Clock.Tick() + create(t, "/tmp/out/.ignore-out-dir", filesystem) + // run another find and check its result + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindNamedAt("/tmp", "hi.txt") + assertSameResponse(t, foundPaths, []string{"/tmp/hi.txt"}) + finder2.Shutdown() +} + +func TestUpdatingDbIffChanged(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/a/hi.txt", filesystem) + create(t, "/tmp/b/bye.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"hi.txt"}, + }, + ) + foundPaths := finder.FindAll() + filesystem.Clock.Tick() + finder.Shutdown() + // check results + assertSameResponse(t, foundPaths, []string{"/tmp/a/hi.txt"}) + + // modify the filesystem + filesystem.Clock.Tick() + create(t, "/tmp/b/hi.txt", filesystem) + filesystem.Clock.Tick() + filesystem.ClearMetrics() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindAll() + finder2.Shutdown() + // check results + assertSameResponse(t, foundPaths, []string{"/tmp/a/hi.txt", "/tmp/b/hi.txt"}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b"}) + expectedDbWriteTime := filesystem.Clock.Time() + actualDbWriteTime := modTime(t, finder2.DbPath, filesystem) + if actualDbWriteTime != expectedDbWriteTime { + t.Fatalf("Expected to write db at %v, actually wrote db at %v\n", + expectedDbWriteTime, actualDbWriteTime) + } + + // reset metrics + filesystem.ClearMetrics() + + // run the third finder + finder3 := finderWithSameParams(t, finder2) + foundPaths = finder3.FindAll() + + // check results + assertSameResponse(t, foundPaths, []string{"/tmp/a/hi.txt", "/tmp/b/hi.txt"}) + assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{}) + finder3.Shutdown() + actualDbWriteTime = modTime(t, finder3.DbPath, filesystem) + if actualDbWriteTime != expectedDbWriteTime { + t.Fatalf("Re-wrote db even when contents did not change") + } + +} + +func TestDirectoryNotPermitted(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/hi.txt", filesystem) + create(t, "/tmp/a/hi.txt", filesystem) + create(t, "/tmp/a/a/hi.txt", filesystem) + create(t, "/tmp/b/hi.txt", filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"hi.txt"}, + }, + ) + foundPaths := finder.FindAll() + filesystem.Clock.Tick() + finder.Shutdown() + allPaths := []string{"/tmp/hi.txt", "/tmp/a/hi.txt", "/tmp/a/a/hi.txt", "/tmp/b/hi.txt"} + // check results + assertSameResponse(t, foundPaths, allPaths) + + // modify the filesystem + filesystem.Clock.Tick() + + setReadable(t, "/tmp/a", false, filesystem) + filesystem.Clock.Tick() + + // run the second finder + finder2 := finderWithSameParams(t, finder) + foundPaths = finder2.FindAll() + finder2.Shutdown() + // check results + assertSameResponse(t, foundPaths, []string{"/tmp/hi.txt", "/tmp/b/hi.txt"}) + + // modify the filesystem back + setReadable(t, "/tmp/a", true, filesystem) + + // run the third finder + finder3 := finderWithSameParams(t, finder2) + foundPaths = finder3.FindAll() + finder3.Shutdown() + // check results + assertSameResponse(t, foundPaths, allPaths) +} + +func TestFileNotPermitted(t *testing.T) { + // setup filesystem + filesystem := newFs() + create(t, "/tmp/hi.txt", filesystem) + setReadable(t, "/tmp/hi.txt", false, filesystem) + + // run the first finder + finder := newFinder( + t, + filesystem, + CacheParams{ + RootDirs: []string{"/tmp"}, + IncludeFiles: []string{"hi.txt"}, + }, + ) + foundPaths := finder.FindAll() + filesystem.Clock.Tick() + finder.Shutdown() + // check results + assertSameResponse(t, foundPaths, []string{"/tmp/hi.txt"}) +} diff --git a/fs/Android.bp b/fs/Android.bp new file mode 100644 index 000000000..68cb88d1e --- /dev/null +++ b/fs/Android.bp @@ -0,0 +1,27 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +// mock filesystem +// + +bootstrap_go_package { + name: "soong-fs", + pkgPath: "android/soong/fs", + srcs: [ + "fs.go", + ], +} + + diff --git a/fs/fs.go b/fs/fs.go new file mode 100644 index 000000000..c6060c008 --- /dev/null +++ b/fs/fs.go @@ -0,0 +1,957 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "bytes" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "os/user" + "path/filepath" + "sync" + "syscall" + "time" +) + +var OsFs FileSystem = osFs{} + +func NewMockFs(files map[string][]byte) *MockFs { + workDir := "/cwd" + fs := &MockFs{ + Clock: NewClock(time.Unix(2, 2)), + workDir: workDir, + } + fs.root = *fs.newDir() + fs.MkDirs(workDir) + + for path, bytes := range files { + dir := filepath.Dir(path) + fs.MkDirs(dir) + fs.WriteFile(path, bytes, 0777) + } + + return fs +} + +type FileSystem interface { + // getting information about files + Open(name string) (file io.ReadCloser, err error) + Lstat(path string) (stats os.FileInfo, err error) + ReadDir(path string) (contents []os.FileInfo, err error) + + InodeNumber(info os.FileInfo) (number uint64, err error) + DeviceNumber(info os.FileInfo) (number uint64, err error) + PermTime(info os.FileInfo) (time time.Time, err error) + + // changing contents of the filesystem + Rename(oldPath string, newPath string) (err error) + WriteFile(path string, data []byte, perm os.FileMode) (err error) + Remove(path string) (err error) + RemoveAll(path string) (err error) + + // metadata about the filesystem + ViewId() (id string) // Some unique id of the user accessing the filesystem +} + +// osFs implements FileSystem using the local disk. +type osFs struct{} + +func (osFs) Open(name string) (io.ReadCloser, error) { return os.Open(name) } + +func (osFs) Lstat(path string) (stats os.FileInfo, err error) { + return os.Lstat(path) +} + +func (osFs) InodeNumber(info os.FileInfo) (number uint64, err error) { + sys := info.Sys() + linuxStats, ok := sys.(*syscall.Stat_t) + if ok { + return linuxStats.Ino, nil + } + return 0, fmt.Errorf("%v is not a *syscall.Stat_t", sys) +} + +func (osFs) DeviceNumber(info os.FileInfo) (number uint64, err error) { + sys := info.Sys() + linuxStats, ok := sys.(*syscall.Stat_t) + if ok { + return linuxStats.Dev, nil + } + return 0, fmt.Errorf("%v is not a *syscall.Stat_t", sys) +} + +func (osFs) PermTime(info os.FileInfo) (when time.Time, err error) { + sys := info.Sys() + linuxStats, ok := sys.(*syscall.Stat_t) + if ok { + return time.Unix(linuxStats.Ctim.Sec, linuxStats.Ctim.Nsec), nil + } + return time.Date(0, 0, 0, 0, 0, 0, 0, nil), fmt.Errorf("%v is not a *syscall.Stat_t", sys) +} + +func (osFs) ReadDir(path string) (contents []os.FileInfo, err error) { + return ioutil.ReadDir(path) +} +func (osFs) Rename(oldPath string, newPath string) error { + return os.Rename(oldPath, newPath) +} + +func (osFs) WriteFile(path string, data []byte, perm os.FileMode) error { + return ioutil.WriteFile(path, data, perm) +} + +func (osFs) Remove(path string) error { + return os.Remove(path) +} + +func (osFs) RemoveAll(path string) error { + return os.RemoveAll(path) +} + +func (osFs) ViewId() (id string) { + user, err := user.Current() + if err != nil { + return "" + } + username := user.Username + + hostname, err := os.Hostname() + if err != nil { + return "" + } + + return username + "@" + hostname +} + +type Clock struct { + time time.Time +} + +func NewClock(startTime time.Time) *Clock { + return &Clock{time: startTime} + +} + +func (c *Clock) Tick() { + c.time = c.time.Add(time.Microsecond) +} + +func (c *Clock) Time() time.Time { + return c.time +} + +// given "/a/b/c/d", pathSplit returns ("/a/b/c", "d") +func pathSplit(path string) (dir string, leaf string) { + dir, leaf = filepath.Split(path) + if dir != "/" && len(dir) > 0 { + dir = dir[:len(dir)-1] + } + return dir, leaf +} + +// MockFs supports singlethreaded writes and multithreaded reads +type MockFs struct { + // configuration + viewId string // + deviceNumber uint64 + + // implementation + root mockDir + Clock *Clock + workDir string + nextInodeNumber uint64 + + // history of requests, for tests to check + StatCalls []string + ReadDirCalls []string + aggregatesLock sync.Mutex +} + +type mockInode struct { + modTime time.Time + permTime time.Time + sys interface{} + inodeNumber uint64 + readable bool +} + +func (m mockInode) ModTime() time.Time { + return m.modTime +} + +func (m mockInode) Sys() interface{} { + return m.sys +} + +type mockFile struct { + bytes []byte + + mockInode +} + +type mockLink struct { + target string + + mockInode +} + +type mockDir struct { + mockInode + + subdirs map[string]*mockDir + files map[string]*mockFile + symlinks map[string]*mockLink +} + +func (m *MockFs) resolve(path string, followLastLink bool) (result string, err error) { + if !filepath.IsAbs(path) { + path = filepath.Join(m.workDir, path) + } + path = filepath.Clean(path) + + return m.followLinks(path, followLastLink, 10) +} + +// note that followLinks can return a file path that doesn't exist +func (m *MockFs) followLinks(path string, followLastLink bool, count int) (canonicalPath string, err error) { + if path == "/" { + return path, nil + } + + parentPath, leaf := pathSplit(path) + if parentPath == path { + err = fmt.Errorf("Internal error: %v yields itself as a parent", path) + panic(err.Error()) + return "", fmt.Errorf("Internal error: %v yields itself as a parent", path) + } + + parentPath, err = m.followLinks(parentPath, true, count) + if err != nil { + return "", err + } + + parentNode, err := m.getDir(parentPath, false) + if err != nil { + return "", err + } + if !parentNode.readable { + return "", &os.PathError{ + Op: "read", + Path: path, + Err: os.ErrPermission, + } + } + + link, isLink := parentNode.symlinks[leaf] + if isLink && followLastLink { + if count <= 0 { + // probably a loop + return "", &os.PathError{ + Op: "read", + Path: path, + Err: fmt.Errorf("too many levels of symbolic links"), + } + } + + if !link.readable { + return "", &os.PathError{ + Op: "read", + Path: path, + Err: os.ErrPermission, + } + } + + target := m.followLink(link, parentPath) + return m.followLinks(target, followLastLink, count-1) + } + return path, nil +} + +func (m *MockFs) followLink(link *mockLink, parentPath string) (result string) { + return filepath.Clean(filepath.Join(parentPath, link.target)) +} + +func (m *MockFs) getFile(parentDir *mockDir, fileName string) (file *mockFile, err error) { + file, isFile := parentDir.files[fileName] + if !isFile { + _, isDir := parentDir.subdirs[fileName] + _, isLink := parentDir.symlinks[fileName] + if isDir || isLink { + return nil, &os.PathError{ + Op: "open", + Path: fileName, + Err: os.ErrInvalid, + } + } + + return nil, &os.PathError{ + Op: "open", + Path: fileName, + Err: os.ErrNotExist, + } + } + if !file.readable { + return nil, &os.PathError{ + Op: "open", + Path: fileName, + Err: os.ErrPermission, + } + } + return file, nil +} + +func (m *MockFs) getInode(parentDir *mockDir, name string) (inode *mockInode, err error) { + file, isFile := parentDir.files[name] + if isFile { + return &file.mockInode, nil + } + link, isLink := parentDir.symlinks[name] + if isLink { + return &link.mockInode, nil + } + dir, isDir := parentDir.subdirs[name] + if isDir { + return &dir.mockInode, nil + } + return nil, &os.PathError{ + Op: "stat", + Path: name, + Err: os.ErrNotExist, + } + +} + +func (m *MockFs) Open(path string) (io.ReadCloser, error) { + path, err := m.resolve(path, true) + if err != nil { + return nil, err + } + + if err != nil { + return nil, err + } + + parentPath, base := pathSplit(path) + parentDir, err := m.getDir(parentPath, false) + if err != nil { + return nil, err + } + file, err := m.getFile(parentDir, base) + if err != nil { + return nil, err + } + return struct { + io.Closer + *bytes.Reader + }{ + ioutil.NopCloser(nil), + bytes.NewReader(file.bytes), + }, nil + +} + +// a mockFileInfo is for exporting file stats in a way that satisfies the FileInfo interface +type mockFileInfo struct { + path string + size int64 + modTime time.Time // time at which the inode's contents were modified + permTime time.Time // time at which the inode's permissions were modified + isDir bool + inodeNumber uint64 + deviceNumber uint64 +} + +func (m *mockFileInfo) Name() string { + return m.path +} + +func (m *mockFileInfo) Size() int64 { + return m.size +} + +func (m *mockFileInfo) Mode() os.FileMode { + return 0 +} + +func (m *mockFileInfo) ModTime() time.Time { + return m.modTime +} + +func (m *mockFileInfo) IsDir() bool { + return m.isDir +} + +func (m *mockFileInfo) Sys() interface{} { + return nil +} + +func (m *MockFs) dirToFileInfo(d *mockDir, path string) (info *mockFileInfo) { + return &mockFileInfo{ + path: path, + size: 1, + modTime: d.modTime, + permTime: d.permTime, + isDir: true, + inodeNumber: d.inodeNumber, + deviceNumber: m.deviceNumber, + } + +} + +func (m *MockFs) fileToFileInfo(f *mockFile, path string) (info *mockFileInfo) { + return &mockFileInfo{ + path: path, + size: 1, + modTime: f.modTime, + permTime: f.permTime, + isDir: false, + inodeNumber: f.inodeNumber, + deviceNumber: m.deviceNumber, + } +} + +func (m *MockFs) linkToFileInfo(l *mockLink, path string) (info *mockFileInfo) { + return &mockFileInfo{ + path: path, + size: 1, + modTime: l.modTime, + permTime: l.permTime, + isDir: false, + inodeNumber: l.inodeNumber, + deviceNumber: m.deviceNumber, + } +} + +func (m *MockFs) Lstat(path string) (stats os.FileInfo, err error) { + // update aggregates + m.aggregatesLock.Lock() + m.StatCalls = append(m.StatCalls, path) + m.aggregatesLock.Unlock() + + // resolve symlinks + path, err = m.resolve(path, false) + if err != nil { + return nil, err + } + + // special case for root dir + if path == "/" { + return m.dirToFileInfo(&m.root, "/"), nil + } + + // determine type and handle appropriately + parentPath, baseName := pathSplit(path) + dir, err := m.getDir(parentPath, false) + if err != nil { + return nil, err + } + subdir, subdirExists := dir.subdirs[baseName] + if subdirExists { + return m.dirToFileInfo(subdir, path), nil + } + file, fileExists := dir.files[baseName] + if fileExists { + return m.fileToFileInfo(file, path), nil + } + link, linkExists := dir.symlinks[baseName] + if linkExists { + return m.linkToFileInfo(link, path), nil + } + // not found + return nil, &os.PathError{ + Op: "stat", + Path: path, + Err: os.ErrNotExist, + } +} + +func (m *MockFs) InodeNumber(info os.FileInfo) (number uint64, err error) { + mockInfo, ok := info.(*mockFileInfo) + if ok { + return mockInfo.inodeNumber, nil + } + return 0, fmt.Errorf("%v is not a mockFileInfo", info) +} +func (m *MockFs) DeviceNumber(info os.FileInfo) (number uint64, err error) { + mockInfo, ok := info.(*mockFileInfo) + if ok { + return mockInfo.deviceNumber, nil + } + return 0, fmt.Errorf("%v is not a mockFileInfo", info) +} +func (m *MockFs) PermTime(info os.FileInfo) (when time.Time, err error) { + mockInfo, ok := info.(*mockFileInfo) + if ok { + return mockInfo.permTime, nil + } + return time.Date(0, 0, 0, 0, 0, 0, 0, nil), + fmt.Errorf("%v is not a mockFileInfo", info) +} + +func (m *MockFs) ReadDir(path string) (contents []os.FileInfo, err error) { + // update aggregates + m.aggregatesLock.Lock() + m.ReadDirCalls = append(m.ReadDirCalls, path) + m.aggregatesLock.Unlock() + + // locate directory + path, err = m.resolve(path, true) + if err != nil { + return nil, err + } + results := []os.FileInfo{} + dir, err := m.getDir(path, false) + if err != nil { + return nil, err + } + if !dir.readable { + return nil, &os.PathError{ + Op: "read", + Path: path, + Err: os.ErrPermission, + } + } + // describe its contents + for name, subdir := range dir.subdirs { + dirInfo := m.dirToFileInfo(subdir, name) + results = append(results, dirInfo) + } + for name, file := range dir.files { + info := m.fileToFileInfo(file, name) + results = append(results, info) + } + for name, link := range dir.symlinks { + info := m.linkToFileInfo(link, name) + results = append(results, info) + } + return results, nil +} + +func (m *MockFs) Rename(sourcePath string, destPath string) error { + // validate source parent exists + sourcePath, err := m.resolve(sourcePath, false) + if err != nil { + return err + } + sourceParentPath := filepath.Dir(sourcePath) + sourceParentDir, err := m.getDir(sourceParentPath, false) + if err != nil { + return err + } + if sourceParentDir == nil { + return &os.PathError{ + Op: "move", + Path: sourcePath, + Err: os.ErrNotExist, + } + } + if !sourceParentDir.readable { + return &os.PathError{ + Op: "move", + Path: sourcePath, + Err: os.ErrPermission, + } + } + + // validate dest parent exists + destPath, err = m.resolve(destPath, false) + destParentPath := filepath.Dir(destPath) + destParentDir, err := m.getDir(destParentPath, false) + if err != nil { + return err + } + if destParentDir == nil { + return &os.PathError{ + Op: "move", + Path: destParentPath, + Err: os.ErrNotExist, + } + } + if !destParentDir.readable { + return &os.PathError{ + Op: "move", + Path: destParentPath, + Err: os.ErrPermission, + } + } + // check the source and dest themselves + sourceBase := filepath.Base(sourcePath) + destBase := filepath.Base(destPath) + + file, sourceIsFile := sourceParentDir.files[sourceBase] + dir, sourceIsDir := sourceParentDir.subdirs[sourceBase] + link, sourceIsLink := sourceParentDir.symlinks[sourceBase] + + // validate that the source exists + if !sourceIsFile && !sourceIsDir && !sourceIsLink { + return &os.PathError{ + Op: "move", + Path: sourcePath, + Err: os.ErrNotExist, + } + + } + + // validate the destination doesn't already exist as an incompatible type + _, destWasFile := destParentDir.files[destBase] + _, destWasDir := destParentDir.subdirs[destBase] + _, destWasLink := destParentDir.symlinks[destBase] + + if destWasDir { + return &os.PathError{ + Op: "move", + Path: destPath, + Err: errors.New("destination exists as a directory"), + } + } + + if sourceIsDir && (destWasFile || destWasLink) { + return &os.PathError{ + Op: "move", + Path: destPath, + Err: errors.New("destination exists as a file"), + } + } + + if destWasFile { + delete(destParentDir.files, destBase) + } + if destWasDir { + delete(destParentDir.subdirs, destBase) + } + if destWasLink { + delete(destParentDir.symlinks, destBase) + } + + if sourceIsFile { + destParentDir.files[destBase] = file + delete(sourceParentDir.files, sourceBase) + } + if sourceIsDir { + destParentDir.subdirs[destBase] = dir + delete(sourceParentDir.subdirs, sourceBase) + } + if sourceIsLink { + destParentDir.symlinks[destBase] = link + delete(destParentDir.symlinks, sourceBase) + } + + destParentDir.modTime = m.Clock.Time() + sourceParentDir.modTime = m.Clock.Time() + return nil +} + +func (m *MockFs) newInodeNumber() uint64 { + result := m.nextInodeNumber + m.nextInodeNumber++ + return result +} + +func (m *MockFs) WriteFile(filePath string, data []byte, perm os.FileMode) error { + filePath, err := m.resolve(filePath, true) + if err != nil { + return err + } + parentPath := filepath.Dir(filePath) + parentDir, err := m.getDir(parentPath, false) + if err != nil || parentDir == nil { + return &os.PathError{ + Op: "write", + Path: parentPath, + Err: os.ErrNotExist, + } + } + if !parentDir.readable { + return &os.PathError{ + Op: "write", + Path: parentPath, + Err: os.ErrPermission, + } + } + + baseName := filepath.Base(filePath) + _, exists := parentDir.files[baseName] + if !exists { + parentDir.modTime = m.Clock.Time() + parentDir.files[baseName] = m.newFile() + } else { + if !parentDir.files[baseName].readable { + return &os.PathError{ + Op: "write", + Path: filePath, + Err: os.ErrPermission, + } + } + } + file := parentDir.files[baseName] + file.bytes = data + file.modTime = m.Clock.Time() + return nil +} + +func (m *MockFs) newFile() *mockFile { + newFile := &mockFile{} + newFile.inodeNumber = m.newInodeNumber() + newFile.modTime = m.Clock.Time() + newFile.permTime = newFile.modTime + newFile.readable = true + return newFile +} + +func (m *MockFs) newDir() *mockDir { + newDir := &mockDir{ + subdirs: make(map[string]*mockDir, 0), + files: make(map[string]*mockFile, 0), + symlinks: make(map[string]*mockLink, 0), + } + newDir.inodeNumber = m.newInodeNumber() + newDir.modTime = m.Clock.Time() + newDir.permTime = newDir.modTime + newDir.readable = true + return newDir +} + +func (m *MockFs) newLink(target string) *mockLink { + newLink := &mockLink{ + target: target, + } + newLink.inodeNumber = m.newInodeNumber() + newLink.modTime = m.Clock.Time() + newLink.permTime = newLink.modTime + newLink.readable = true + + return newLink +} +func (m *MockFs) MkDirs(path string) error { + _, err := m.getDir(path, true) + return err +} + +// getDir doesn't support symlinks +func (m *MockFs) getDir(path string, createIfMissing bool) (dir *mockDir, err error) { + cleanedPath := filepath.Clean(path) + if cleanedPath == "/" { + return &m.root, nil + } + + parentPath, leaf := pathSplit(cleanedPath) + if len(parentPath) >= len(path) { + return &m.root, nil + } + parent, err := m.getDir(parentPath, createIfMissing) + if err != nil { + return nil, err + } + if !parent.readable { + return nil, &os.PathError{ + Op: "stat", + Path: path, + Err: os.ErrPermission, + } + } + childDir, dirExists := parent.subdirs[leaf] + if !dirExists { + if createIfMissing { + // confirm that a file with the same name doesn't already exist + _, fileExists := parent.files[leaf] + if fileExists { + return nil, &os.PathError{ + Op: "mkdir", + Path: path, + Err: os.ErrExist, + } + } + // create this directory + childDir = m.newDir() + parent.subdirs[leaf] = childDir + parent.modTime = m.Clock.Time() + } else { + return nil, &os.PathError{ + Op: "stat", + Path: path, + Err: os.ErrNotExist, + } + } + } + return childDir, nil + +} + +func (m *MockFs) Remove(path string) (err error) { + path, err = m.resolve(path, false) + parentPath, leaf := pathSplit(path) + if len(leaf) == 0 { + return fmt.Errorf("Cannot remove %v\n", path) + } + parentDir, err := m.getDir(parentPath, false) + if err != nil { + return err + } + if parentDir == nil { + return &os.PathError{ + Op: "remove", + Path: path, + Err: os.ErrNotExist, + } + } + if !parentDir.readable { + return &os.PathError{ + Op: "remove", + Path: path, + Err: os.ErrPermission, + } + } + _, isDir := parentDir.subdirs[leaf] + if isDir { + return &os.PathError{ + Op: "remove", + Path: path, + Err: os.ErrInvalid, + } + } + _, isLink := parentDir.symlinks[leaf] + if isLink { + delete(parentDir.symlinks, leaf) + } else { + _, isFile := parentDir.files[leaf] + if !isFile { + return &os.PathError{ + Op: "remove", + Path: path, + Err: os.ErrNotExist, + } + } + delete(parentDir.files, leaf) + } + parentDir.modTime = m.Clock.Time() + return nil +} + +func (m *MockFs) Symlink(oldPath string, newPath string) (err error) { + newPath, err = m.resolve(newPath, false) + if err != nil { + return err + } + + newParentPath, leaf := pathSplit(newPath) + newParentDir, err := m.getDir(newParentPath, false) + if !newParentDir.readable { + return &os.PathError{ + Op: "link", + Path: newPath, + Err: os.ErrPermission, + } + } + if err != nil { + return err + } + newParentDir.symlinks[leaf] = m.newLink(oldPath) + return nil +} + +func (m *MockFs) RemoveAll(path string) (err error) { + path, err = m.resolve(path, false) + if err != nil { + return err + } + parentPath, leaf := pathSplit(path) + if len(leaf) == 0 { + return fmt.Errorf("Cannot remove %v\n", path) + } + parentDir, err := m.getDir(parentPath, false) + if err != nil { + return err + } + if parentDir == nil { + return &os.PathError{ + Op: "removeAll", + Path: path, + Err: os.ErrNotExist, + } + } + if !parentDir.readable { + return &os.PathError{ + Op: "removeAll", + Path: path, + Err: os.ErrPermission, + } + + } + _, isFile := parentDir.files[leaf] + _, isLink := parentDir.symlinks[leaf] + if isFile || isLink { + return m.Remove(path) + } + _, isDir := parentDir.subdirs[leaf] + if !isDir { + if !isDir { + return &os.PathError{ + Op: "removeAll", + Path: path, + Err: os.ErrNotExist, + } + } + } + + delete(parentDir.subdirs, leaf) + parentDir.modTime = m.Clock.Time() + return nil +} + +func (m *MockFs) SetReadable(path string, readable bool) error { + path, err := m.resolve(path, false) + if err != nil { + return err + } + parentPath, leaf := filepath.Split(path) + parentDir, err := m.getDir(parentPath, false) + if err != nil { + return err + } + if !parentDir.readable { + return &os.PathError{ + Op: "chmod", + Path: parentPath, + Err: os.ErrPermission, + } + } + + inode, err := m.getInode(parentDir, leaf) + if err != nil { + return err + } + inode.readable = readable + inode.permTime = m.Clock.Time() + return nil +} + +func (m *MockFs) ClearMetrics() { + m.ReadDirCalls = []string{} + m.StatCalls = []string{} +} + +func (m *MockFs) ViewId() (id string) { + return m.viewId +} + +func (m *MockFs) SetViewId(id string) { + m.viewId = id +} +func (m *MockFs) SetDeviceNumber(deviceNumber uint64) { + m.deviceNumber = deviceNumber +} diff --git a/ui/logger/logger.go b/ui/logger/logger.go index db7e82ad7..7d9687bec 100644 --- a/ui/logger/logger.go +++ b/ui/logger/logger.go @@ -145,13 +145,14 @@ func New(out io.Writer) *stdLogger { // SetVerbose controls whether Verbose[f|ln] logs to stderr as well as the // file-backed log. -func (s *stdLogger) SetVerbose(v bool) { +func (s *stdLogger) SetVerbose(v bool) *stdLogger { s.verbose = v + return s } // SetOutput controls where the file-backed log will be saved. It will keep // some number of backups of old log files. -func (s *stdLogger) SetOutput(path string) { +func (s *stdLogger) SetOutput(path string) *stdLogger { if f, err := CreateFileWithRotation(path, 5); err == nil { s.mutex.Lock() defer s.mutex.Unlock() @@ -164,6 +165,7 @@ func (s *stdLogger) SetOutput(path string) { } else { s.Fatal(err.Error()) } + return s } // Close disables logging to the file and closes the file handle.