From a6bd044ac15370c66d984111d19fe3b31113d42e Mon Sep 17 00:00:00 2001
From: Chih-Hung Hsieh <chh@google.com>
Date: Tue, 11 Oct 2016 15:25:26 -0700
Subject: [PATCH] Use parallel subprocesses to classify warnings.

* Add a --processes flag to specify number of parallel processes,
  with default multiprocessing.cpu_count().
* Wrap long line to suppress pylint warning.

Test: run warn.py with a large build.log file.
Change-Id: I9a93a9324bc531c1bce741367013051ce40a67fa
---
 tools/warn.py | 60 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/tools/warn.py b/tools/warn.py
index 9dbe12e42..5de818621 100755
--- a/tools/warn.py
+++ b/tools/warn.py
@@ -81,6 +81,7 @@ Use option --gencsv to output warning counts in CSV format.
 #   dump_csv():
 
 import argparse
+import multiprocessing
 import os
 import re
 
@@ -99,6 +100,10 @@ parser.add_argument('--url',
 parser.add_argument('--separator',
                     help='Separator between the end of a URL and the line '
                     'number argument. e.g. #')
+parser.add_argument('--processes',
+                    type=int,
+                    default=multiprocessing.cpu_count(),
+                    help='Number of parallel processes to process warnings')
 parser.add_argument(dest='buildlog', metavar='build.log',
                     help='Path to build.log file')
 args = parser.parse_args()
@@ -1706,7 +1711,8 @@ project_list = [
     simple_project_pattern('frameworks/av/media/mtp'),
     simple_project_pattern('frameworks/av/media/ndk'),
     simple_project_pattern('frameworks/av/media/utils'),
-    project_name_and_pattern('frameworks/av/media/Other', 'frameworks/av/media'),
+    project_name_and_pattern('frameworks/av/media/Other',
+                             'frameworks/av/media'),
     simple_project_pattern('frameworks/av/radio'),
     simple_project_pattern('frameworks/av/services'),
     simple_project_pattern('frameworks/av/soundtrigger'),
@@ -2062,22 +2068,13 @@ def find_project_index(line):
   return -1
 
 
-def classify_warning(line):
+def classify_one_warning(line, results):
   for i in range(len(warn_patterns)):
     w = warn_patterns[i]
     for cpat in w['compiled_patterns']:
       if cpat.match(line):
-        w['members'].append(line)
         p = find_project_index(line)
-        index = len(warning_messages)
-        warning_messages.append(line)
-        warning_records.append([i, p, index])
-        pname = '???' if p < 0 else project_names[p]
-        # Count warnings by project.
-        if pname in w['projects']:
-          w['projects'][pname] += 1
-        else:
-          w['projects'][pname] = 1
+        results.append([line, i, p])
         return
       else:
         # If we end up here, there was a problem parsing the log
@@ -2086,6 +2083,38 @@ def classify_warning(line):
         pass
 
 
+def classify_warnings(lines):
+  results = []
+  for line in lines:
+    classify_one_warning(line, results)
+  return results
+
+
+def parallel_classify_warnings(warning_lines):
+  """Classify all warning lines with num_cpu parallel processes."""
+  num_cpu = args.processes
+  groups = [[] for x in range(num_cpu)]
+  i = 0
+  for x in warning_lines:
+    groups[i].append(x)
+    i = (i + 1) % num_cpu
+  pool = multiprocessing.Pool(num_cpu)
+  group_results = pool.map(classify_warnings, groups)
+  for result in group_results:
+    for line, pattern_idx, project_idx in result:
+      pattern = warn_patterns[pattern_idx]
+      pattern['members'].append(line)
+      message_idx = len(warning_messages)
+      warning_messages.append(line)
+      warning_records.append([pattern_idx, project_idx, message_idx])
+      pname = '???' if project_idx < 0 else project_names[project_idx]
+      # Count warnings by project.
+      if pname in pattern['projects']:
+        pattern['projects'][pname] += 1
+      else:
+        pattern['projects'][pname] = 1
+
+
 def compile_patterns():
   """Precompiling every pattern speeds up parsing by about 30x."""
   for i in warn_patterns:
@@ -2153,14 +2182,12 @@ def parse_input_file():
   warning_pattern = re.compile('^[^ ]*/[^ ]*: warning: .*')
   compile_patterns()
 
-  # read the log file and classify all the warnings
+  # Collect all warnings into the warning_lines set.
   warning_lines = set()
   for line in infile:
     if warning_pattern.match(line):
       line = normalize_warning_line(line)
-      if line not in warning_lines:
-        classify_warning(line)
-        warning_lines.add(line)
+      warning_lines.add(line)
     elif line_counter < 50:
       # save a little bit of time by only doing this for the first few lines
       line_counter += 1
@@ -2173,6 +2200,7 @@ def parse_input_file():
       m = re.search('(?<=^TARGET_BUILD_VARIANT=).*', line)
       if m is not None:
         target_variant = m.group(0)
+  parallel_classify_warnings(warning_lines)
 
 
 # Return s with escaped backslash and quotation characters.