Add a CSV parser to parse the output from kati.

(Yet another class that would be great for android to have in a host
tools library)

Test: m product-config-test && java -jar out/host/linux-x86/testcases/product-config-test/product-config-test.jar
Change-Id: I7d74b2265393e4f340729ca5ba82d1ec92a20f85
This commit is contained in:
Joe Onorato 2021-01-19 22:14:23 -08:00
parent 43d4040f6f
commit 7c01d47c9a
3 changed files with 392 additions and 1 deletions

View File

@ -0,0 +1,242 @@
/*
* Copyright (C) 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.build.config;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* A CSV parser.
*/
public class CsvParser {
/**
* Internal string buffer grows by this amount.
*/
private static final int CHUNK_SIZE = 64 * 1024;
/**
* Error parsing.
*/
public static class ParseException extends Exception {
private int mLine;
private int mColumn;
public ParseException(int line, int column, String message) {
super(message);
mLine = line;
mColumn = column;
}
/**
* Line number in source file.
*/
public int getLine() {
return mLine;
}
/**
* Column in source file.
*/
public int getColumn() {
return mColumn;
}
}
public static class Line {
private final int mLineNumber;
private final List<String> mFields;
Line(int lineno, List<String> fields) {
mLineNumber = lineno;
mFields = fields;
}
public int getLine() {
return mLineNumber;
}
public List<String> getFields() {
return mFields;
}
}
// Parser States
private static final int STATE_START_LINE = 0;
private static final int STATE_START_FIELD = 1;
private static final int STATE_INSIDE_QUOTED_FIELD = 2;
private static final int STATE_FIRST_QUOTATION_MARK = 3;
private static final int STATE_INSIDE_UNQUOTED_FIELD = 4;
private static final int STATE_DONE = 5;
// Parser Actions
private static final int ACTION_APPEND_CHAR = 1;
private static final int ACTION_FIELD_COMPLETE = 2;
private static final int ACTION_LINE_COMPLETE = 4;
/**
* Constructor.
*/
private CsvParser() {
}
/**
* Reads CSV and returns a list of Line objects.
*
* Handles newlines inside fields quoted with double quotes (").
*
* Doesn't report blank lines, but does include empty fields.
*/
public static List<Line> parse(Reader reader)
throws ParseException, IOException {
ArrayList<Line> result = new ArrayList();
int line = 1;
int column = 1;
int pos = 0;
char[] buf = new char[CHUNK_SIZE];
HashMap<String,String> stringPool = new HashMap();
ArrayList<String> fields = new ArrayList();
int state = STATE_START_LINE;
while (state != STATE_DONE) {
int c = reader.read();
int action = 0;
if (state == STATE_START_LINE) {
if (c <= 0) {
// No data, skip ACTION_LINE_COMPLETE.
state = STATE_DONE;
} else if (c == '"') {
state = STATE_INSIDE_QUOTED_FIELD;
} else if (c == ',') {
action = ACTION_FIELD_COMPLETE;
state = STATE_START_FIELD;
} else if (c == '\n') {
// Consume the newline, state stays STATE_START_LINE.
} else {
action = ACTION_APPEND_CHAR;
state = STATE_INSIDE_UNQUOTED_FIELD;
}
} else if (state == STATE_START_FIELD) {
if (c <= 0) {
// Field will be empty
action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
state = STATE_DONE;
} else if (c == '"') {
state = STATE_INSIDE_QUOTED_FIELD;
} else if (c == ',') {
action = ACTION_FIELD_COMPLETE;
state = STATE_START_FIELD;
} else if (c == '\n') {
action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
state = STATE_START_LINE;
} else {
action = ACTION_APPEND_CHAR;
state = STATE_INSIDE_UNQUOTED_FIELD;
}
} else if (state == STATE_INSIDE_QUOTED_FIELD) {
if (c <= 0) {
throw new ParseException(line, column,
"Bad input: End of input inside quoted field.");
} else if (c == '"') {
state = STATE_FIRST_QUOTATION_MARK;
} else {
action = ACTION_APPEND_CHAR;
}
} else if (state == STATE_FIRST_QUOTATION_MARK) {
if (c <= 0) {
action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
state = STATE_DONE;
} else if (c == '"') {
action = ACTION_APPEND_CHAR;
state = STATE_INSIDE_QUOTED_FIELD;
} else if (c == ',') {
action = ACTION_FIELD_COMPLETE;
state = STATE_START_FIELD;
} else if (c == '\n') {
action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
state = STATE_START_LINE;
} else {
throw new ParseException(line, column,
"Bad input: Character after field ended or unquoted '\"'.");
}
} else if (state == STATE_INSIDE_UNQUOTED_FIELD) {
if (c <= 0) {
action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
state = STATE_DONE;
} else if (c == ',') {
action = ACTION_FIELD_COMPLETE;
state = STATE_START_FIELD;
} else if (c == '\n') {
action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
state = STATE_START_LINE;
} else {
action = ACTION_APPEND_CHAR;
}
}
if ((action & ACTION_APPEND_CHAR) != 0) {
// Reallocate buffer if necessary. Hopefully not often because CHUNK_SIZE is big.
if (pos >= buf.length) {
char[] old = buf;
buf = new char[old.length + CHUNK_SIZE];
System.arraycopy(old, 0, buf, 0, old.length);
}
// Store the character
buf[pos] = (char)c;
pos++;
}
if ((action & ACTION_FIELD_COMPLETE) != 0) {
// A lot of the strings are duplicated, so pool them to reduce peak memory
// usage. This could be made slightly better by having a custom key class
// that does the lookup without making a new String that gets immediately
// thrown away.
String field = new String(buf, 0, pos);
final String cached = stringPool.get(field);
if (cached == null) {
stringPool.put(field, field);
} else {
field = cached;
}
fields.add(field);
pos = 0;
}
if ((action & ACTION_LINE_COMPLETE) != 0) {
// Only report lines with any contents
if (fields.size() > 0) {
result.add(new Line(line, fields));
fields = new ArrayList();
}
}
if (c == '\n') {
line++;
column = 1;
} else {
column++;
}
}
return result;
}
}

View File

@ -0,0 +1,148 @@
/*
* Copyright (C) 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.build.config;
import org.junit.Assert;
import org.junit.Test;
import java.io.StringReader;
import java.util.Arrays;
import java.util.List;
/**
* Test for CSV parser class.
*/
public class CsvParserTest {
public String listsToStrings(String[] expected, List<String> actual) {
return "expected=" + Arrays.toString(expected)
+ " actual=" + Arrays.toString(actual.toArray());
}
public void assertLineEquals(CsvParser.Line actual, int lineno, String... fields) {
if (actual.getLine() != lineno) {
throw new RuntimeException("lineno mismatch: expected=" + lineno
+ " actual=" + actual.getLine());
}
if (fields.length != actual.getFields().size()) {
throw new RuntimeException("getFields().size() mismatch: expected=" + fields.length
+ " actual=" + actual.getFields().size()
+ " values: " + listsToStrings(fields, actual.getFields()));
}
for (int i = 0; i < fields.length; i++) {
if (!fields[i].equals(actual.getFields().get(i))) {
throw new RuntimeException("getFields().get(" + i + ") mismatch: expected="
+ fields[i] + " actual=" + actual.getFields().get(i)
+ " values: " + listsToStrings(fields, actual.getFields()));
}
}
}
@Test
public void testEmptyString() throws Exception {
List<CsvParser.Line> lines = CsvParser.parse(new StringReader(
""));
Assert.assertEquals(0, lines.size());
}
@Test
public void testLexerOneCharacter() throws Exception {
List<CsvParser.Line> lines = CsvParser.parse(new StringReader(
"a"));
Assert.assertEquals(1, lines.size());
assertLineEquals(lines.get(0), 1, "a");
}
@Test
public void testLexerTwoFieldsNoNewline() throws Exception {
List<CsvParser.Line> lines = CsvParser.parse(new StringReader(
"a,b"));
Assert.assertEquals(1, lines.size());
assertLineEquals(lines.get(0), 1, "a", "b");
}
@Test
public void testLexerTwoFieldsNewline() throws Exception {
List<CsvParser.Line> lines = CsvParser.parse(new StringReader(
"a,b\n"));
Assert.assertEquals(1, lines.size());
assertLineEquals(lines.get(0), 1, "a", "b");
}
@Test
public void testEndsWithTwoNewlines() throws Exception {
List<CsvParser.Line> lines = CsvParser.parse(new StringReader(
"a,b\n\n"));
Assert.assertEquals(1, lines.size());
assertLineEquals(lines.get(0), 1, "a", "b");
}
@Test
public void testOnlyNewlines() throws Exception {
List<CsvParser.Line> lines = CsvParser.parse(new StringReader(
"\n\n\n\n"));
Assert.assertEquals(0, lines.size());
}
@Test
public void testLexerComplex() throws Exception {
List<CsvParser.Line> lines = CsvParser.parse(new StringReader(
",\"ab\"\"\nc\",,de\n"
+ "fg,\n"
+ "\n"
+ ",\n"
+ "hijk"));
Assert.assertEquals(4, lines.size());
assertLineEquals(lines.get(0), 2, "", "ab\"\nc", "", "de");
assertLineEquals(lines.get(1), 3, "fg", "");
assertLineEquals(lines.get(2), 5, "", "");
assertLineEquals(lines.get(3), 6, "hijk");
}
@Test
public void testEndInsideQuoted() throws Exception {
try {
List<CsvParser.Line> lines = CsvParser.parse(new StringReader(
"\"asd"));
throw new RuntimeException("Didn't throw ParseException");
} catch (CsvParser.ParseException ex) {
System.out.println("Caught: " + ex);
}
}
@Test
public void testCharacterAfterQuotedField() throws Exception {
try {
List<CsvParser.Line> lines = CsvParser.parse(new StringReader(
"\"\"a"));
throw new RuntimeException("Didn't throw ParseException");
} catch (CsvParser.ParseException ex) {
System.out.println("Caught: " + ex);
}
}
}

View File

@ -39,7 +39,8 @@ public class TestRunner {
System.out.println(failure.getTrace());
}
});
Result result = junit.run(ErrorReporterTest.class,
Result result = junit.run(CsvParserTest.class,
ErrorReporterTest.class,
OptionsTest.class);
if (!result.wasSuccessful()) {
System.out.println("\n*** FAILED ***");