diff --git a/.gitignore b/.gitignore
index ebc0014..40dad18 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,5 @@ chapter1/crawler/datas/comments1/
 chapter1/crawler/datas/products/
 chapter1/crawler/taobao/settings.py
 chapter2/mysite/mysite/settings.py
-.env
\ No newline at end of file
+.env
+chapter4/results.csv
diff --git a/chapter1/crawler/taobao/__pycache__/middlewares.cpython-37.pyc b/chapter1/crawler/taobao/__pycache__/middlewares.cpython-37.pyc
index a0323b1..f5c6419 100644
Binary files a/chapter1/crawler/taobao/__pycache__/middlewares.cpython-37.pyc and b/chapter1/crawler/taobao/__pycache__/middlewares.cpython-37.pyc differ
diff --git a/chapter1/crawler/taobao/items.py b/chapter1/crawler/taobao/items.py
index aa8f4c5..5d4a5df 100644
--- a/chapter1/crawler/taobao/items.py
+++ b/chapter1/crawler/taobao/items.py
@@ -13,6 +13,15 @@ class ZhihuItem(scrapy.Item):
     # name = scrapy.Field()
     pass
 
+class ProductSpecItem(scrapy.Item):
+    cpu = scrapy.Field()
+    rom = scrapy.Field()
+    ram = scrapy.Field() 
+    resolution = scrapy.Field()
+    charge = scrapy.Field()
+    weight = scrapy.Field()
+    brand = scrapy.Field()
+
 class ProxyItem(scrapy.Item):
     ip = scrapy.Field()
     port = scrapy.Field()
diff --git a/chapter1/crawler/taobao/middlewares.py b/chapter1/crawler/taobao/middlewares.py
index fc129aa..0eae133 100644
--- a/chapter1/crawler/taobao/middlewares.py
+++ b/chapter1/crawler/taobao/middlewares.py
@@ -6,12 +6,14 @@
 # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
 import pdb
 import time
+import json
+import requests
 
 from scrapy.http import HtmlResponse
 from selenium.common.exceptions import TimeoutException
 from selenium import webdriver
 from selenium.webdriver.support.wait import WebDriverWait
-
+from scrapy_splash import SplashRequest
 
 from scrapy import signals
 
@@ -144,3 +146,23 @@ class ZhihuChromeMiddleware(object):
     def spider_closed(self, spider, reason):
         print('驱动关闭')
         self.driver.close()
+
+
+class RandomProxyMiddleware(object):
+    _ip_lists = []
+
+    def get_ip(self):
+        if len(self._ip_lists) <= 0:
+            r = requests.get('http://bigdata1.educoder.net/myapp/api/proxys.json')
+            self._ip_lists = json.loads(r.text)
+
+        data = self._ip_lists.pop()
+        return "http://%s:%d" % (data['ip'], data['port'])
+            
+
+    def process_request(self, request, spider):
+        if isinstance(request, SplashRequest):
+            ip = self.get_ip()
+            print(ip)
+            request.meta['splash']['args']['proxy'] = ip
+            # request.meta['proxy'] = ip
\ No newline at end of file
diff --git a/chapter1/crawler/taobao/pipelines.py b/chapter1/crawler/taobao/pipelines.py
index 71ce326..2c49775 100644
--- a/chapter1/crawler/taobao/pipelines.py
+++ b/chapter1/crawler/taobao/pipelines.py
@@ -12,7 +12,7 @@ import happybase
 from scrapy.conf import settings
 from scrapy.pipelines.images import ImagesPipeline
 
-from taobao.items import JDProductItem, JDCommentItem, JDCommentSummary
+from taobao.items import JDProductItem, JDCommentItem, JDCommentSummary,ProductSpecItem
 from taobao.utils import check_alive_proxy
 from scrapy.exceptions import DropItem
 
@@ -129,5 +129,17 @@ class JDHbasePipeline(object):
                                     "comment:default_good_count": str(item["default_good_count"]),
                                     "comment:good_rate": str(item["good_rate"])
                                     })
+        elif isinstance(item, ProductSpecItem):
+            self.product_table.put(item["id"],
+                    {
+                        "spec:cpu": str(item["cpu"]),
+                        "spec:rom": str(item['rom']),
+                        "spec:ram": str(item["ram"]),
+                        "spec:resolution": str(item["resolution"]),
+                        "spec:charge": str(item["charge"]),
+                        "spec:weight": str(item["weight"]),
+                        "spec:brand": str(item["brand"]),
+                    }
+             )
 
         return item
diff --git a/chapter1/crawler/taobao/spiders/jd.py b/chapter1/crawler/taobao/spiders/jd.py
index 5ddef03..86f3a42 100644
--- a/chapter1/crawler/taobao/spiders/jd.py
+++ b/chapter1/crawler/taobao/spiders/jd.py
@@ -7,7 +7,7 @@ from scrapy import Request
 from scrapy.conf import settings
 from scrapy_splash import SplashRequest
 import pdb
-from taobao.items import JDProductItem, JDCommentItem, JDCommentSummary
+from taobao.items import JDProductItem, JDCommentItem, JDCommentSummary, ProductSpecItem
 import re
 
 from taobao.utils import now_time
@@ -21,11 +21,19 @@ class JdSpider(scrapy.Spider):
     ]
 
     custom_settings = {
-        'LOG_LEVEL': "WARN",
+        'LOG_LEVEL': "INFO",
         'ITEM_PIPELINES': {
             'taobao.pipelines.JDCleanDataPipeline': 300,
-            'taobao.pipelines.JDHbasePipeline': 400,
-        }
+            #'taobao.pipelines.JDHbasePipeline': 400,
+        },
+        'DOWNLOADER_MIDDLEWARES': {
+            'scrapy.downloadermiddlewares.retry.RetryMiddleware': 90,
+            #'taobao.middlewares.RandomProxyMiddleware': 100,
+            # 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 110,
+            'scrapy_splash.SplashCookiesMiddleware': 723,
+            'scrapy_splash.SplashMiddleware': 725,
+            'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
+        },
     }
 
     def make_comment_url(self, product_id, page):
@@ -38,10 +46,34 @@ class JdSpider(scrapy.Spider):
         for url in self.start_urls:
             yield SplashRequest(url,
                                 self.parse,
+                                method='GET',
                                 args={})
 
+    def parse_spec(self, response):
+        #spec
+        pdb.set_trace()
+        cpu = response.xpath('//dt[text()="CPU型号"]/following-sibling::dd/text()').extract_first()
+        rom = response.xpath('//dt[text()="ROM"]/following-sibling::dd[2]/text()').extract_first()
+        ram = response.xpath('//dt[text()="RAM"]/following-sibling::dd[2]/text()').extract_first()
+        resolution = response.xpath('//dt[text()="分辨率"]/following-sibling::dd/text()').extract_first()
+        charge = response.xpath('//dt[text()="电池容量（mAh）"]/following-sibling::dd/text()').extract_first()
+        weight = response.xpath('//dt[text()="机身重量（g）"]/following-sibling::dd/text()').extract_first()
+        brand = response.xpath('//dt[text()="品牌"]/following-sibling::dd/text()').extract_first()
+
+        print(cpu, rom, ram, resolution, charge, weight, brand)
+
+        yield ProductSpecItem(
+            cpu = cpu,
+            rom = rom,
+            ram = ram,
+            resolution = resolution,
+            charge = charge,
+            weight = weight,
+            brand = brand, 
+        )
+
+
     def parse_comment(self, response):
-        # pdb.set_trace()
         product_id = response.meta["product_id"]
         page = response.meta["page"]
         print("抓取评论, %s, 当前 %d页" % (product_id, page))
@@ -128,9 +160,12 @@ class JdSpider(scrapy.Spider):
             if m:
                 product_id = m.group(1)
                 comment_url = self.make_comment_url(product_id, 0)
-                yield Request(comment_url, self.parse_comment,
-                              meta={'product_id': product_id, 'page': 0},
-                              priority=100)
+                # yield Request(comment_url, self.parse_comment,
+                #               meta={'product_id': product_id, 'page': 0},
+                #               priority=100)
+            
+            if url:
+                yield SplashRequest(url='https:'+url, callback=self.parse_spec)
 
         # 获取下一页
         next_page = response.xpath('//a[@class="pn-next"]/@href').extract_first()
diff --git a/chapter4/check_proxy.py b/chapter4/check_proxy.py
new file mode 100644
index 0000000..8eb6294
--- /dev/null
+++ b/chapter4/check_proxy.py
@@ -0,0 +1,31 @@
+import requests
+import time
+import re
+
+def check_alive_proxy(ip, port):
+    begin_time = int(time.time())
+    proxies = {
+        "http": "http://%s:%s" % (ip, port),
+        "https": "https://%s:%s" % (ip, port),
+    }
+    response = requests.get(
+        'http://2019.ip138.com/ic.asp', proxies=proxies, timeout=3)
+
+    check_time = int(time.time()) - begin_time
+
+    response.encoding = 'gb2312'
+    m = re.search(r'.+\[((\d+\.){3}\d+)\].+', response.text)
+    if m:
+        if m.group(1) == ip:
+            return check_time
+    raise RuntimeError("连接出错")
+
+if __name__ == "__main__":
+    with open('results.csv') as f:
+        for line in f:
+            try:
+                check_time = check_alive_proxy(line.strip(), 9999)
+                if check_time<5:
+                    print("%s:%d %d", line, 9999, check_time)
+            except Exception as e:
+                print(e)
\ No newline at end of file