deal with the charset and gzip compressing

2013-12-09 20:51:00 +08:00 · 2013-12-09 20:51:00 +08:00 · 0a2cfcddd2
parent a1a572d7f0
commit 0a2cfcddd2
15 changed files with 354 additions and 110 deletions
--- a/Script/baidu.html
+++ b/Script/baidu.html
@ -0,0 +1,17 @@
+<html>
+<head>
+    <title>Bench4Q Test Case</title>
+    <link href="style/bootstrap-cerulean.css" />
+    <link href="style/bootstrap-classic.css" />
+    <link href="style/bootstrap-cerulean.css" />
+</head>
+<body>
+    <img src="images/1.jpg" alt="No this one" />
+    <img src="images/2.jpg" alt="No this one" />
+    <img src="images/3.jpg" alt="No this one" />
+
+    <script src="script/agentTable.js" type="text/javascript"></script>
+    <script src="script/base.js" type="text/javascript"></script>
+</body>
+
+</html>
--- a/Script/gzipResponse.txt
+++ b/Script/gzipResponse.txt
--- a/Script/gzipResponseBody.txt
+++ b/Script/gzipResponseBody.txt
--- a/Script/requestHeader.txt
+++ b/Script/requestHeader.txt
@ -0,0 +1,10 @@
+GET / HTTP/1.1
+Host: www.baidu.com
+Connection: keep-alive
+Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
+User-Agent: Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36
+DNT: 1
+Accept-Encoding: gzip,deflate,sdch
+Accept-Language: en,zh-CN;q=0.8,zh;q=0.6
+Cookie: BAIDUID=1D43A956BCED0A81B8340058134CD2F6:FG=1; BDUSS=EJMRWZ2eklMaERoQ344em5RZ2EyTVh0UjRDcWpiRmhnMjRlLTZnR3NZajBOS0JTQVFBQUFBJCQAAAAAAAAAAAEAAAA2JuwxZmVuZ3l1bjIwMTIzOQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPSneFL0p3hSN; Hm_lvt_9f14aaa038bbba8b12ec2a4a3e51d254=1384429678; H_PS_PSSID=3784_4199_1432_4421_4414_4211_4264_4450_4503; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0
+
--- a/Script/responsBodyAfterDeal.txt
+++ b/Script/responsBodyAfterDeal.txt
--- a/Script/simpleResponse.txt
+++ b/Script/simpleResponse.txt
@ -0,0 +1,26 @@
+HTTP/1.1 200 OK
+Server: Apache-Coyote/1.1
+Accept-Ranges: bytes
+ETag: W/"532-1386125334307"
+Last-Modified: Wed, 04 Dec 2013 02:48:54 GMT
+Content-Type: text/html
+Content-Length: 532
+Date: Mon, 09 Dec 2013 07:03:15 GMT
+
+<html>
+<head>
+    <title>Bench4Q Test Case</title>
+    <link href="style/bootstrap-cerulean.css" />
+    <link href="style/bootstrap-classic.css" />
+    <link href="style/bootstrap-cerulean.css" />
+</head>
+<body>
+    <img src="images/1.jpg" alt="No this one" />
+    <img src="images/2.jpg" alt="No this one" />
+    <img src="images/3.jpg" alt="No this one" />
+
+    <script src="script/agentTable.js" type="text/javascript"></script>
+    <script src="script/base.js" type="text/javascript"></script>
+</body>
+
+</html>
--- a/pom.xml
+++ b/pom.xml
@ -52,11 +52,11 @@
 			<artifactId>commons-httpclient</artifactId>
 			<version>3.1</version>
 		</dependency>
-	<dependency>
-		<groupId>org.python</groupId>
-		<artifactId>jython</artifactId>
-		<version>2.7-b1</version>
-	</dependency>
+		<dependency>
+			<groupId>org.python</groupId>
+			<artifactId>jython</artifactId>
+			<version>2.7-b1</version>
+		</dependency>
 		<dependency>
 			<groupId>org.springframework</groupId>
 			<artifactId>spring-test</artifactId>
--- a/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/AbstractCodeGenerator.java
+++ b/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/AbstractCodeGenerator.java
@ -1,14 +1,15 @@
 package org.bench4q.master.scriptrecord.httpcapture.generator;

+import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.nio.charset.Charset;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.StringTokenizer;
 import java.util.Vector;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@ -318,15 +319,18 @@ public abstract class AbstractCodeGenerator implements IScriptGenerator,
 			this.ignoreNextResponse = false;
 			return;
 		}
-		// TODO:Get charset from responseHeader
+		ResponseParser parser = new ResponseParser();
 		String respStr = new String(response).toLowerCase();
-		String respCode = parseResponseCode(respStr);
-		String contentType = parseContentType(respStr);
-		String charset = parseCharset(respStr);
-		@SuppressWarnings("unused")
-		String contentEncoding = parseContentEncoding(respStr);
+		String respCode = parser.parseResponseCode(respStr);
+		String contentType = parser.parseContentType(respStr);
+		String charset = parser.parseCharset(respStr);
+		String contentEncoding = parser.parseContentEncoding(respStr);
+		ContentDecoder contentDecoder = ContentDecoder
+				.createDecoder(contentEncoding);
+		byte[] contentBodyAfterDecoded = contentDecoder.decodeContent(
+				new ByteArrayInputStream(parser.parseReponseBody(respStr)),
+				Charset.forName(charset));
 		doAssertResponse(respCode);
-
 		if ((contentType != null) && (mimeTypes.get(contentType) != null)
 				&& (respCode != null)) {
 			if (respCode.startsWith("200")) {
@ -334,10 +338,10 @@ public abstract class AbstractCodeGenerator implements IScriptGenerator,
 					setStruts(true);
 				else
 					setStruts(false);
-
 				if (contentType.toLowerCase().compareTo("text/html") == 0) {
 					doTidyCode(HttpTestCase.staticUrlDecode(header.url));
-					doParseHtmlContent(response, charset, header.url);
+					doParseHtmlContent(contentBodyAfterDecoded, charset,
+							header.url);
 					if (isCpRspToStdout())
 						doResponseForStdOut(HttpTestCase.staticUrlDecode(
 								header.url).trim());
@ -345,92 +349,16 @@ public abstract class AbstractCodeGenerator implements IScriptGenerator,
 						doResponseForFile();
 				}
 			}
-
 			doEndTransaction();
 		} else {
 			log.debug("Ignoring response because content type is not known: "
 					+ contentType);
 		}
-
 		if (isFirstRequest())
 			this.isFirstRequest = false;
 	}

-	private String parseContentEncoding(String respStr) {
-		int pos = respStr.indexOf("content-encoding:");
-		if (pos != -1) {
-			pos += 18;
-			int end = respStr.indexOf("/r/n", pos);
-			return respStr.substring(pos, end);
-		}
-		return null;
-	}
-
-	private String parseCharset(String response) {
-		String ret = null;
-		int pos = response.indexOf("content-type:");
-		if (pos > -1) {
-			pos += 14;
-			int end = response.indexOf("\r\n", pos);
-			int middle = response.indexOf(";", pos);
-			if (middle > -1 && middle < end) {
-				ret = response.substring(middle + 1, end);
-			}
-		}
-		if (ret != null) {
-			int begin = ret.indexOf("charset=");
-			ret = ret.substring(begin + 8);
-		}
-		return ret;
-	}
-
-	private String parseContentType(String response) {
-		String contentType = null;
-
-		int pos = response.indexOf("content-type:");
-		if (pos > -1) {
-			pos += 14;
-			int end = response.indexOf("\r\n", pos);
-			int end2 = response.indexOf(";", pos);
-			if ((end2 > -1) && (end2 < end))
-				end = end2;
-			if (end > -1)
-				contentType = response.substring(pos, end).trim();
-
-			log.debug("  Content-Type: " + contentType);
-		} else {
-			log.debug("  No content-type header!  First few lines:");
-			StringTokenizer st = new StringTokenizer(response, "\n");
-			int i = 0;
-			while ((st.hasMoreTokens()) && (i < 5)) {
-				log.debug(st.nextToken());
-				++i;
-			}
-		}
-		return contentType;
-	}
-
-	private String parseResponseCode(String response) {
-		String respCode = null;
-		int pos = response.indexOf(" ");
-		if (pos != -1) {
-			int end = response.indexOf(" ", pos + 1);
-			int end2 = response.indexOf("\n", pos + 1);
-			if ((end2 != -1) && (end2 < end))
-				end = end2;
-			if (end != -1)
-				respCode = response.substring(pos + 1, end).trim();
-		}
-
-		log.debug("HTTP response code: " + respCode);
-		return respCode;
-	}
-
-	@SuppressWarnings("unused")
-	private String decodeContent(String response, String encode) {
-		// TODO:
-		return null;
-	}
+	// TODO: edit this type

 	public void run() {
 		this.outstandingInserts = new LinkedList<BehaviorBaseModel>();
--- a/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/Bench4qCodeGenerator.java
+++ b/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/Bench4qCodeGenerator.java
@ -204,13 +204,14 @@ public class Bench4qCodeGenerator extends AbstractCodeGenerator {
 	}

 	@Override
-	public void doParseHtmlContent(byte[] response, String charset,
+	public void doParseHtmlContent(byte[] responseBody, String charset,
 			String rootUrl) {
 		if (charset == null) {
-			charset = "ANSI";
+			charset = "ISO-8895-1";
 		}
-		String responseContent = new String(response, Charset.forName(charset));
-		int htmlStart = responseContent.indexOf("<html>");
+		String responseContent = new String(responseBody,
+				Charset.forName(charset));
+		int htmlStart = responseContent.indexOf("<html");
 		int htmlEnd = responseContent.indexOf("</html>");
 		if (htmlStart == -1 || htmlEnd == -1) {
 			return;
--- a/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/ContentDecoder.java
+++ b/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/ContentDecoder.java
@ -0,0 +1,40 @@
+package org.bench4q.master.scriptrecord.httpcapture.generator;
+
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+
+import org.apache.log4j.Logger;
+import org.bench4q.master.exception.DealWithLog;
+
+public class ContentDecoder {
+	protected Logger logger = Logger.getLogger(ContentDecoder.class);
+
+	protected ContentDecoder() {
+	}
+
+	public static ContentDecoder createDecoder(String encodeType) {
+		if (encodeType.equalsIgnoreCase("gzip")) {
+			return new GzipDecoder();
+		} else {
+			return new ContentDecoder();
+		}
+	}
+
+	public byte[] decodeContent(InputStream inputStream, Charset charset) {
+		try {
+			ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+			byte[] buf = new byte[1024];
+			int len;
+			while ((len = inputStream.read(buf)) > 0) {
+				outputStream.write(buf, 0, len);
+			}
+			return outputStream.toString(charset.name()).getBytes(charset);
+		} catch (Exception e) {
+			logger.error(DealWithLog.getExceptionStackTrace(e));
+			return null;
+		}
+
+	}
+
+}
--- a/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/ContentDecompress.java
+++ b/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/ContentDecompress.java
@ -1,5 +0,0 @@
-package org.bench4q.master.scriptrecord.httpcapture.generator;
-
-public class ContentDecompress {
-
-}
--- a/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/GzipDecoder.java
+++ b/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/GzipDecoder.java
@ -0,0 +1,28 @@
+package org.bench4q.master.scriptrecord.httpcapture.generator;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+import org.bench4q.master.exception.DealWithLog;
+
+public class GzipDecoder extends ContentDecoder {
+
+	public byte[] decodeContent(InputStream inputStream) {
+		try {
+			GZIPInputStream gzipInputStream = new GZIPInputStream(inputStream);
+			ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+
+			byte[] buf = new byte[1024];
+			int len;
+			while ((len = gzipInputStream.read(buf)) > 0) {
+				outputStream.write(buf, 0, len);
+			}
+			return outputStream.toByteArray();
+		} catch (IOException e) {
+			logger.error(DealWithLog.getExceptionStackTrace(e));
+			return null;
+		}
+	}
+}
--- a/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/ResponseParser.java
+++ b/src/main/java/org/bench4q/master/scriptrecord/httpcapture/generator/ResponseParser.java
@ -0,0 +1,108 @@
+package org.bench4q.master.scriptrecord.httpcapture.generator;
+
+import java.io.IOException;
+import java.util.StringTokenizer;
+
+import org.apache.log4j.Logger;
+
+public class ResponseParser {
+	private Logger logger = Logger.getLogger(ResponseParser.class);
+
+	private String preprocess(String respString) {
+		return respString.toLowerCase();
+	}
+
+	public byte[] parseReponseBody(String respStr) throws IOException {
+		String content = "";
+		int pos = respStr.indexOf("\r\n\r\n");
+		if (pos > -1) {
+			pos += 4;
+			content = respStr.substring(pos);
+		}
+		return content.getBytes();
+	}
+
+	public String parseContentLength(String input) {
+		String respStr = preprocess(input);
+		int pos = respStr.indexOf("content-length:");
+		if (pos > -1) {
+			pos += 15;
+			int end = respStr.indexOf("\r\n", pos);
+			return respStr.substring(pos, end).trim();
+		}
+		return null;
+	}
+
+	public String parseContentEncoding(String input) {
+		String respStr = preprocess(input);
+		int pos = respStr.indexOf("content-encoding:");
+		if (pos > -1) {
+			pos += 18;
+			int end = respStr.indexOf("\r\n", pos);
+			return respStr.substring(pos, end);
+		}
+		return null;
+	}
+
+	public String parseCharset(String input) {
+		String response = preprocess(input);
+		String ret = null;
+		int pos = response.indexOf("content-type:");
+		if (pos > -1) {
+			pos += 14;
+			int end = response.indexOf("\r\n", pos);
+			int middle = response.indexOf(";", pos);
+			if (middle > -1 && middle < end) {
+				ret = response.substring(middle + 1, end);
+			}
+		}
+		if (ret != null) {
+			int begin = ret.indexOf("charset=");
+			ret = ret.substring(begin + 8);
+		}
+		return ret;
+	}
+
+	public String parseContentType(String input) {
+		String response = preprocess(input);
+		String contentType = null;
+		int pos = response.indexOf("content-type:");
+		if (pos > -1) {
+			pos += 14;
+			int end = response.indexOf("\r\n", pos);
+			int end2 = response.indexOf(";", pos);
+			if ((end2 > -1) && (end2 < end))
+				end = end2;
+			if (end > -1)
+				contentType = response.substring(pos, end).trim();
+
+			logger.debug("  Content-Type: " + contentType);
+		} else {
+			logger.debug("  No content-type header!  First few lines:");
+			StringTokenizer st = new StringTokenizer(response, "\n");
+			int i = 0;
+			while ((st.hasMoreTokens()) && (i < 5)) {
+				logger.debug(st.nextToken());
+				++i;
+			}
+		}
+		return contentType;
+	}
+
+	public String parseResponseCode(String input) {
+		String response = preprocess(input);
+		String respCode = null;
+		int pos = response.indexOf(" ");
+		if (pos != -1) {
+			int end = response.indexOf(" ", pos + 1);
+			int end2 = response.indexOf("\n", pos + 1);
+			if ((end2 != -1) && (end2 < end))
+				end = end2;
+			if (end != -1)
+				respCode = response.substring(pos + 1, end).trim();
+		}
+
+		logger.debug("HTTP response code: " + respCode);
+		return respCode;
+	}
+}
--- a/src/test/java/org/bench4q/master/test/recordscript/TestCodeGenerator.java
+++ b/src/test/java/org/bench4q/master/test/recordscript/TestCodeGenerator.java
@ -0,0 +1,100 @@
+package org.bench4q.master.test.recordscript;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.commons.io.FileUtils;
+import org.bench4q.master.scriptrecord.httpcapture.Bench4qTestScriptAdapter;
+import org.bench4q.master.scriptrecord.httpcapture.HttpRequestHeader;
+import org.bench4q.master.scriptrecord.httpcapture.generator.Bench4qCodeGenerator;
+import org.bench4q.master.scriptrecord.httpcapture.generator.ContentDecoder;
+import org.bench4q.master.scriptrecord.httpcapture.generator.IScriptGenerator;
+import org.bench4q.master.scriptrecord.httpcapture.generator.ResponseParser;
+import org.bench4q.share.models.agent.RunScenarioModel;
+import org.junit.Test;
+
+public class TestCodeGenerator {
+	private Bench4qTestScriptAdapter scriptAdapter = new Bench4qTestScriptAdapter(
+			new RunScenarioModel());
+	private Bench4qCodeGenerator codeGenerator = new Bench4qCodeGenerator(
+			this.scriptAdapter);
+
+	@Test
+	public void testProcessResponse() throws Exception {
+		HttpRequestHeader header = new HttpRequestHeader(new FileInputStream(
+				new File("Script/requestHeader.txt")));
+		IScriptGenerator scriptGenerator = this.codeGenerator;
+		scriptGenerator.processResponse(
+				header,
+				FileUtils.readFileToString(new File("Script/gzipResponse.txt"),
+						"ISO-8859-1").getBytes("ISO-8859-1"));
+		assertTrue(this.scriptAdapter.getChildrenUrls().size() > 0);
+	}
+
+	@Test
+	public void testGetContentBodyWithoutCompress() throws IOException {
+		String contentString = new String(
+				new ResponseParser().parseReponseBody(FileUtils
+						.readFileToString(new File("Script/simpleResponse.txt"))));
+		assertTrue(contentString.indexOf("<html") == 0);
+		assertTrue(contentString.length() == 532);
+	}
+
+	@Test
+	public void testParseContentLength() throws IOException {
+		String contentLengthString = new ResponseParser()
+				.parseContentLength(FileUtils.readFileToString(
+						new File("Script/gzipResponse.txt")).toLowerCase());
+		int contentLength = Integer.parseInt(contentLengthString);
+		assertTrue(contentLength == 12852);
+	}
+
+	@Test
+	public void testGetContentBody() throws IOException {
+		String contentFromFile = FileUtils.readFileToString(new File(
+				"Script/gzipResponseBody.txt"), "ISO-8859-1");
+		byte[] contentBody1 = contentFromFile.getBytes("ISO-8859-1");
+		String responseString = new String(contentBody1,
+				Charset.forName("ISO-8859-1"));
+		System.out.println("testGetContentBody total length:"
+				+ responseString.getBytes("ISO-8859-1").length);
+		System.out.println("testGetContentBody :" + contentBody1.length);
+		GZIPInputStream inputStream = new GZIPInputStream(
+				new ByteArrayInputStream(contentBody1));
+		assertEquals(responseString.getBytes("ISO-8859-1").length,
+				contentBody1.length);
+		assertTrue(inputStream != null);
+	}
+
+	@Test
+	public void testUncompressGzipContent() throws IOException {
+		ContentDecoder contentDecoder = ContentDecoder.createDecoder("gzip");
+		String contentFromFile = FileUtils.readFileToString(new File(
+				"Script/gzipResponseBody.txt"), "ISO-8859-1");
+		ByteArrayInputStream inputStream = new ByteArrayInputStream(
+				contentFromFile.getBytes("ISO-8859-1"));
+		String contentString = new String(contentDecoder.decodeContent(
+				inputStream, Charset.forName("ISO-8859-1")),
+				Charset.forName("ISO-8859-1"));
+		System.out.println(contentString);
+		assertTrue(contentString.indexOf("<html") > -1);
+	}
+
+	@Test
+	public void testFileInputStreamAndFileUtils() throws IOException {
+		byte[] responseBodyBytes = FileUtils.readFileToString(
+				new File("Script/gzipResponseBody.txt"), "UTF-8").getBytes(
+				"UTF-8");
+		int length1 = responseBodyBytes.length;
+		String responseBodyString = new String(responseBodyBytes, "UTF-8");
+		int length2 = responseBodyString.getBytes("UTF-8").length;
+		assertEquals(length1, length2);
+		System.out.println(length1);
+	}
+}
--- a/src/test/java/org/bench4q/master/test/recordscript/TestDomGenerator.java
+++ b/src/test/java/org/bench4q/master/test/recordscript/TestDomGenerator.java
@ -5,7 +5,6 @@ import static org.junit.Assert.*;
 import java.io.File;
 import java.io.IOException;
 import java.net.URL;
-
 import org.apache.commons.io.FileUtils;
 import org.bench4q.master.scriptrecord.httpcapture.generator.ChildrenUrl;
 import org.junit.Test;
@ -66,12 +65,4 @@ public class TestDomGenerator extends TestRecordBase {
 				.size() == 0);
 	}

-	public void testChildUrlWithNotProperCase() {
-
-	}
-
-	@Test
-	public void testUncompressGzipContent() {
-		
-	}
 }