deal with the charset and gzip compressing

This commit is contained in:
Tienan Chen 2013-12-09 20:51:00 +08:00
parent a1a572d7f0
commit 0a2cfcddd2
15 changed files with 354 additions and 110 deletions

17
Script/baidu.html Normal file
View File

@ -0,0 +1,17 @@
<html>
<head>
<title>Bench4Q Test Case</title>
<link href="style/bootstrap-cerulean.css" />
<link href="style/bootstrap-classic.css" />
<link href="style/bootstrap-cerulean.css" />
</head>
<body>
<img src="images/1.jpg" alt="No this one" />
<img src="images/2.jpg" alt="No this one" />
<img src="images/3.jpg" alt="No this one" />
<script src="script/agentTable.js" type="text/javascript"></script>
<script src="script/base.js" type="text/javascript"></script>
</body>
</html>

BIN
Script/gzipResponse.txt Normal file

Binary file not shown.

BIN
Script/gzipResponseBody.txt Normal file

Binary file not shown.

10
Script/requestHeader.txt Normal file
View File

@ -0,0 +1,10 @@
GET / HTTP/1.1
Host: www.baidu.com
Connection: keep-alive
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
User-Agent: Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36
DNT: 1
Accept-Encoding: gzip,deflate,sdch
Accept-Language: en,zh-CN;q=0.8,zh;q=0.6
Cookie: BAIDUID=1D43A956BCED0A81B8340058134CD2F6:FG=1; BDUSS=EJMRWZ2eklMaERoQ344em5RZ2EyTVh0UjRDcWpiRmhnMjRlLTZnR3NZajBOS0JTQVFBQUFBJCQAAAAAAAAAAAEAAAA2JuwxZmVuZ3l1bjIwMTIzOQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPSneFL0p3hSN; Hm_lvt_9f14aaa038bbba8b12ec2a4a3e51d254=1384429678; H_PS_PSSID=3784_4199_1432_4421_4414_4211_4264_4450_4503; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0

View File

26
Script/simpleResponse.txt Normal file
View File

@ -0,0 +1,26 @@
HTTP/1.1 200 OK
Server: Apache-Coyote/1.1
Accept-Ranges: bytes
ETag: W/"532-1386125334307"
Last-Modified: Wed, 04 Dec 2013 02:48:54 GMT
Content-Type: text/html
Content-Length: 532
Date: Mon, 09 Dec 2013 07:03:15 GMT
<html>
<head>
<title>Bench4Q Test Case</title>
<link href="style/bootstrap-cerulean.css" />
<link href="style/bootstrap-classic.css" />
<link href="style/bootstrap-cerulean.css" />
</head>
<body>
<img src="images/1.jpg" alt="No this one" />
<img src="images/2.jpg" alt="No this one" />
<img src="images/3.jpg" alt="No this one" />
<script src="script/agentTable.js" type="text/javascript"></script>
<script src="script/base.js" type="text/javascript"></script>
</body>
</html>

10
pom.xml
View File

@ -52,11 +52,11 @@
<artifactId>commons-httpclient</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>org.python</groupId>
<artifactId>jython</artifactId>
<version>2.7-b1</version>
</dependency>
<dependency>
<groupId>org.python</groupId>
<artifactId>jython</artifactId>
<version>2.7-b1</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>

View File

@ -1,14 +1,15 @@
package org.bench4q.master.scriptrecord.httpcapture.generator;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -318,15 +319,18 @@ public abstract class AbstractCodeGenerator implements IScriptGenerator,
this.ignoreNextResponse = false;
return;
}
// TODO:Get charset from responseHeader
ResponseParser parser = new ResponseParser();
String respStr = new String(response).toLowerCase();
String respCode = parseResponseCode(respStr);
String contentType = parseContentType(respStr);
String charset = parseCharset(respStr);
@SuppressWarnings("unused")
String contentEncoding = parseContentEncoding(respStr);
String respCode = parser.parseResponseCode(respStr);
String contentType = parser.parseContentType(respStr);
String charset = parser.parseCharset(respStr);
String contentEncoding = parser.parseContentEncoding(respStr);
ContentDecoder contentDecoder = ContentDecoder
.createDecoder(contentEncoding);
byte[] contentBodyAfterDecoded = contentDecoder.decodeContent(
new ByteArrayInputStream(parser.parseReponseBody(respStr)),
Charset.forName(charset));
doAssertResponse(respCode);
if ((contentType != null) && (mimeTypes.get(contentType) != null)
&& (respCode != null)) {
if (respCode.startsWith("200")) {
@ -334,10 +338,10 @@ public abstract class AbstractCodeGenerator implements IScriptGenerator,
setStruts(true);
else
setStruts(false);
if (contentType.toLowerCase().compareTo("text/html") == 0) {
doTidyCode(HttpTestCase.staticUrlDecode(header.url));
doParseHtmlContent(response, charset, header.url);
doParseHtmlContent(contentBodyAfterDecoded, charset,
header.url);
if (isCpRspToStdout())
doResponseForStdOut(HttpTestCase.staticUrlDecode(
header.url).trim());
@ -345,92 +349,16 @@ public abstract class AbstractCodeGenerator implements IScriptGenerator,
doResponseForFile();
}
}
doEndTransaction();
} else {
log.debug("Ignoring response because content type is not known: "
+ contentType);
}
if (isFirstRequest())
this.isFirstRequest = false;
}
private String parseContentEncoding(String respStr) {
int pos = respStr.indexOf("content-encoding:");
if (pos != -1) {
pos += 18;
int end = respStr.indexOf("/r/n", pos);
return respStr.substring(pos, end);
}
return null;
}
private String parseCharset(String response) {
String ret = null;
int pos = response.indexOf("content-type:");
if (pos > -1) {
pos += 14;
int end = response.indexOf("\r\n", pos);
int middle = response.indexOf(";", pos);
if (middle > -1 && middle < end) {
ret = response.substring(middle + 1, end);
}
}
if (ret != null) {
int begin = ret.indexOf("charset=");
ret = ret.substring(begin + 8);
}
return ret;
}
private String parseContentType(String response) {
String contentType = null;
int pos = response.indexOf("content-type:");
if (pos > -1) {
pos += 14;
int end = response.indexOf("\r\n", pos);
int end2 = response.indexOf(";", pos);
if ((end2 > -1) && (end2 < end))
end = end2;
if (end > -1)
contentType = response.substring(pos, end).trim();
log.debug(" Content-Type: " + contentType);
} else {
log.debug(" No content-type header! First few lines:");
StringTokenizer st = new StringTokenizer(response, "\n");
int i = 0;
while ((st.hasMoreTokens()) && (i < 5)) {
log.debug(st.nextToken());
++i;
}
}
return contentType;
}
private String parseResponseCode(String response) {
String respCode = null;
int pos = response.indexOf(" ");
if (pos != -1) {
int end = response.indexOf(" ", pos + 1);
int end2 = response.indexOf("\n", pos + 1);
if ((end2 != -1) && (end2 < end))
end = end2;
if (end != -1)
respCode = response.substring(pos + 1, end).trim();
}
log.debug("HTTP response code: " + respCode);
return respCode;
}
@SuppressWarnings("unused")
private String decodeContent(String response, String encode) {
// TODO:
return null;
}
// TODO: edit this type
public void run() {
this.outstandingInserts = new LinkedList<BehaviorBaseModel>();

View File

@ -204,13 +204,14 @@ public class Bench4qCodeGenerator extends AbstractCodeGenerator {
}
@Override
public void doParseHtmlContent(byte[] response, String charset,
public void doParseHtmlContent(byte[] responseBody, String charset,
String rootUrl) {
if (charset == null) {
charset = "ANSI";
charset = "ISO-8895-1";
}
String responseContent = new String(response, Charset.forName(charset));
int htmlStart = responseContent.indexOf("<html>");
String responseContent = new String(responseBody,
Charset.forName(charset));
int htmlStart = responseContent.indexOf("<html");
int htmlEnd = responseContent.indexOf("</html>");
if (htmlStart == -1 || htmlEnd == -1) {
return;

View File

@ -0,0 +1,40 @@
package org.bench4q.master.scriptrecord.httpcapture.generator;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.nio.charset.Charset;
import org.apache.log4j.Logger;
import org.bench4q.master.exception.DealWithLog;
public class ContentDecoder {
protected Logger logger = Logger.getLogger(ContentDecoder.class);
protected ContentDecoder() {
}
public static ContentDecoder createDecoder(String encodeType) {
if (encodeType.equalsIgnoreCase("gzip")) {
return new GzipDecoder();
} else {
return new ContentDecoder();
}
}
public byte[] decodeContent(InputStream inputStream, Charset charset) {
try {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
byte[] buf = new byte[1024];
int len;
while ((len = inputStream.read(buf)) > 0) {
outputStream.write(buf, 0, len);
}
return outputStream.toString(charset.name()).getBytes(charset);
} catch (Exception e) {
logger.error(DealWithLog.getExceptionStackTrace(e));
return null;
}
}
}

View File

@ -1,5 +0,0 @@
package org.bench4q.master.scriptrecord.httpcapture.generator;
public class ContentDecompress {
}

View File

@ -0,0 +1,28 @@
package org.bench4q.master.scriptrecord.httpcapture.generator;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;
import org.bench4q.master.exception.DealWithLog;
public class GzipDecoder extends ContentDecoder {
public byte[] decodeContent(InputStream inputStream) {
try {
GZIPInputStream gzipInputStream = new GZIPInputStream(inputStream);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
byte[] buf = new byte[1024];
int len;
while ((len = gzipInputStream.read(buf)) > 0) {
outputStream.write(buf, 0, len);
}
return outputStream.toByteArray();
} catch (IOException e) {
logger.error(DealWithLog.getExceptionStackTrace(e));
return null;
}
}
}

View File

@ -0,0 +1,108 @@
package org.bench4q.master.scriptrecord.httpcapture.generator;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.log4j.Logger;
public class ResponseParser {
private Logger logger = Logger.getLogger(ResponseParser.class);
private String preprocess(String respString) {
return respString.toLowerCase();
}
public byte[] parseReponseBody(String respStr) throws IOException {
String content = "";
int pos = respStr.indexOf("\r\n\r\n");
if (pos > -1) {
pos += 4;
content = respStr.substring(pos);
}
return content.getBytes();
}
public String parseContentLength(String input) {
String respStr = preprocess(input);
int pos = respStr.indexOf("content-length:");
if (pos > -1) {
pos += 15;
int end = respStr.indexOf("\r\n", pos);
return respStr.substring(pos, end).trim();
}
return null;
}
public String parseContentEncoding(String input) {
String respStr = preprocess(input);
int pos = respStr.indexOf("content-encoding:");
if (pos > -1) {
pos += 18;
int end = respStr.indexOf("\r\n", pos);
return respStr.substring(pos, end);
}
return null;
}
public String parseCharset(String input) {
String response = preprocess(input);
String ret = null;
int pos = response.indexOf("content-type:");
if (pos > -1) {
pos += 14;
int end = response.indexOf("\r\n", pos);
int middle = response.indexOf(";", pos);
if (middle > -1 && middle < end) {
ret = response.substring(middle + 1, end);
}
}
if (ret != null) {
int begin = ret.indexOf("charset=");
ret = ret.substring(begin + 8);
}
return ret;
}
public String parseContentType(String input) {
String response = preprocess(input);
String contentType = null;
int pos = response.indexOf("content-type:");
if (pos > -1) {
pos += 14;
int end = response.indexOf("\r\n", pos);
int end2 = response.indexOf(";", pos);
if ((end2 > -1) && (end2 < end))
end = end2;
if (end > -1)
contentType = response.substring(pos, end).trim();
logger.debug(" Content-Type: " + contentType);
} else {
logger.debug(" No content-type header! First few lines:");
StringTokenizer st = new StringTokenizer(response, "\n");
int i = 0;
while ((st.hasMoreTokens()) && (i < 5)) {
logger.debug(st.nextToken());
++i;
}
}
return contentType;
}
public String parseResponseCode(String input) {
String response = preprocess(input);
String respCode = null;
int pos = response.indexOf(" ");
if (pos != -1) {
int end = response.indexOf(" ", pos + 1);
int end2 = response.indexOf("\n", pos + 1);
if ((end2 != -1) && (end2 < end))
end = end2;
if (end != -1)
respCode = response.substring(pos + 1, end).trim();
}
logger.debug("HTTP response code: " + respCode);
return respCode;
}
}

View File

@ -0,0 +1,100 @@
package org.bench4q.master.test.recordscript;
import static org.junit.Assert.*;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.FileUtils;
import org.bench4q.master.scriptrecord.httpcapture.Bench4qTestScriptAdapter;
import org.bench4q.master.scriptrecord.httpcapture.HttpRequestHeader;
import org.bench4q.master.scriptrecord.httpcapture.generator.Bench4qCodeGenerator;
import org.bench4q.master.scriptrecord.httpcapture.generator.ContentDecoder;
import org.bench4q.master.scriptrecord.httpcapture.generator.IScriptGenerator;
import org.bench4q.master.scriptrecord.httpcapture.generator.ResponseParser;
import org.bench4q.share.models.agent.RunScenarioModel;
import org.junit.Test;
public class TestCodeGenerator {
private Bench4qTestScriptAdapter scriptAdapter = new Bench4qTestScriptAdapter(
new RunScenarioModel());
private Bench4qCodeGenerator codeGenerator = new Bench4qCodeGenerator(
this.scriptAdapter);
@Test
public void testProcessResponse() throws Exception {
HttpRequestHeader header = new HttpRequestHeader(new FileInputStream(
new File("Script/requestHeader.txt")));
IScriptGenerator scriptGenerator = this.codeGenerator;
scriptGenerator.processResponse(
header,
FileUtils.readFileToString(new File("Script/gzipResponse.txt"),
"ISO-8859-1").getBytes("ISO-8859-1"));
assertTrue(this.scriptAdapter.getChildrenUrls().size() > 0);
}
@Test
public void testGetContentBodyWithoutCompress() throws IOException {
String contentString = new String(
new ResponseParser().parseReponseBody(FileUtils
.readFileToString(new File("Script/simpleResponse.txt"))));
assertTrue(contentString.indexOf("<html") == 0);
assertTrue(contentString.length() == 532);
}
@Test
public void testParseContentLength() throws IOException {
String contentLengthString = new ResponseParser()
.parseContentLength(FileUtils.readFileToString(
new File("Script/gzipResponse.txt")).toLowerCase());
int contentLength = Integer.parseInt(contentLengthString);
assertTrue(contentLength == 12852);
}
@Test
public void testGetContentBody() throws IOException {
String contentFromFile = FileUtils.readFileToString(new File(
"Script/gzipResponseBody.txt"), "ISO-8859-1");
byte[] contentBody1 = contentFromFile.getBytes("ISO-8859-1");
String responseString = new String(contentBody1,
Charset.forName("ISO-8859-1"));
System.out.println("testGetContentBody total length:"
+ responseString.getBytes("ISO-8859-1").length);
System.out.println("testGetContentBody :" + contentBody1.length);
GZIPInputStream inputStream = new GZIPInputStream(
new ByteArrayInputStream(contentBody1));
assertEquals(responseString.getBytes("ISO-8859-1").length,
contentBody1.length);
assertTrue(inputStream != null);
}
@Test
public void testUncompressGzipContent() throws IOException {
ContentDecoder contentDecoder = ContentDecoder.createDecoder("gzip");
String contentFromFile = FileUtils.readFileToString(new File(
"Script/gzipResponseBody.txt"), "ISO-8859-1");
ByteArrayInputStream inputStream = new ByteArrayInputStream(
contentFromFile.getBytes("ISO-8859-1"));
String contentString = new String(contentDecoder.decodeContent(
inputStream, Charset.forName("ISO-8859-1")),
Charset.forName("ISO-8859-1"));
System.out.println(contentString);
assertTrue(contentString.indexOf("<html") > -1);
}
@Test
public void testFileInputStreamAndFileUtils() throws IOException {
byte[] responseBodyBytes = FileUtils.readFileToString(
new File("Script/gzipResponseBody.txt"), "UTF-8").getBytes(
"UTF-8");
int length1 = responseBodyBytes.length;
String responseBodyString = new String(responseBodyBytes, "UTF-8");
int length2 = responseBodyString.getBytes("UTF-8").length;
assertEquals(length1, length2);
System.out.println(length1);
}
}

View File

@ -5,7 +5,6 @@ import static org.junit.Assert.*;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import org.apache.commons.io.FileUtils;
import org.bench4q.master.scriptrecord.httpcapture.generator.ChildrenUrl;
import org.junit.Test;
@ -66,12 +65,4 @@ public class TestDomGenerator extends TestRecordBase {
.size() == 0);
}
public void testChildUrlWithNotProperCase() {
}
@Test
public void testUncompressGzipContent() {
}
}