From 5639278337d3b41daee5089ccff6cc1e161c1688 Mon Sep 17 00:00:00 2001
From: guange <8863824@gmail.com>
Date: Mon, 21 Jan 2019 10:17:09 +0800
Subject: [PATCH] .

---
 .gitignore                                    |  1 +
 .vscode/settings.json                         |  3 +
 chapter1/crawler/demo.py                      | 61 +++++++++++--------
 chapter1/crawler/taobao/items.py              |  3 +-
 chapter1/crawler/taobao/spiders/jd1.py        | 43 +++++++------
 chapter2/mysite/myapp/static/scripts/index.js |  3 +
 .../mysite/myapp/static/styles/common.css     | 41 +++++++++++++
 .../mysite/myapp/templates/myapp/crawl.html   | 24 +++++++-
 chapter3/spark.py                             |  1 +
 chapter1/requirements.txt => requirements.txt |  1 +
 10 files changed, 136 insertions(+), 45 deletions(-)
 create mode 100644 .vscode/settings.json
 rename chapter1/requirements.txt => requirements.txt (98%)

diff --git a/.gitignore b/.gitignore
index 46e2435..ebc0014 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ chapter1/crawler/datas/comments1/
 chapter1/crawler/datas/products/
 chapter1/crawler/taobao/settings.py
 chapter2/mysite/mysite/settings.py
+.env
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..4a5e2f5
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": ".env/bin/python"
+}
\ No newline at end of file
diff --git a/chapter1/crawler/demo.py b/chapter1/crawler/demo.py
index 31968ab..46f57d6 100644
--- a/chapter1/crawler/demo.py
+++ b/chapter1/crawler/demo.py
@@ -1,31 +1,44 @@
-import re,requests,json
+import re
+import requests
+import json
 
 s = requests.session()
-url = 'https://club.jd.com/comment/productPageComments.action'
+url = "https://club.jd.com/comment/productPageComments.action"
 data = {
-'callback':'fetchJSON_comment98vv61',
-'productId':'3888284',
-'score':0,
-'sortType':5,
-'pageSize':10,
-'isShadowSku':0,
-'page':0
+    'callback': 'fetchJSON_comment98vv61',
+    'productId': '3888284',
+    'score': 0,
+    'sortType': 5,
+    'pageSize': 10,
+    'isShadowSku': 0,
+    'page': 0
 }
 
-while True:
-    t = s.get(url,params = data).text
-    try:
-        t = re.search(r'(?<=fetchJSON_comment98vv61\().*(?=\);)',t).group(0)
-    except Exception as e:
-        break
 
-    j = json.loads(t)
-    commentSummary = j['comments']
-    for comment in commentSummary:
-        c_content = comment['content']
-        c_time = comment['referenceTime']
-        c_name = comment['nickname']
-        c_client = comment['userClientShow']
-        print('{} {} {}\n{}\n'.format(c_name,c_time,c_client,c_content))
+def main():
+    while True:
+        t = s.get(url, params=data).text
+        try:
+            t = re.search(
+                r'(?<=fetchjson_comment98vv61\().*(?=\);)', t).group(0)
+        except Exception as e:
+            print(e)
 
-    data['page'] += 1
\ No newline at end of file
+        j = json.loads(t)
+        commentsummary = j["comments"]
+        for comment in commentsummary:
+            c_content = comment["content"]
+            c_time = comment['referencetime']
+            c_name = comment['nickname']
+            c_client = comment['userclientshow']
+            print('{} {} {}\n{}\n'.format(c_name, c_time, c_client, c_content))
+
+        data['page'] += 1
+
+
+if __name__ == "__main__":
+    import time
+    import datetime
+    oldtime = datetime.timedelta(days=1)
+    print(oldtime)
+    print(datetime.datetime.now()-oldtime)
diff --git a/chapter1/crawler/taobao/items.py b/chapter1/crawler/taobao/items.py
index d03dbfd..6ff862b 100644
--- a/chapter1/crawler/taobao/items.py
+++ b/chapter1/crawler/taobao/items.py
@@ -22,7 +22,7 @@ class ProductItem(scrapy.Item):
     author = scrapy.Field()
     shop = scrapy.Field()
     pubtime = scrapy.Field()
-    publisher   = scrapy.Field()
+    publisher = scrapy.Field()
     url = scrapy.Field()
     detail = scrapy.Field()
     comment_num = scrapy.Field()
@@ -67,4 +67,3 @@ class JDCommentSummary(scrapy.Item):
     comment_count = scrapy.Field()
     default_good_count = scrapy.Field()
     good_rate = scrapy.Field()
-
diff --git a/chapter1/crawler/taobao/spiders/jd1.py b/chapter1/crawler/taobao/spiders/jd1.py
index 2a03084..df18422 100644
--- a/chapter1/crawler/taobao/spiders/jd1.py
+++ b/chapter1/crawler/taobao/spiders/jd1.py
@@ -55,7 +55,8 @@ class Jd1Spider(scrapy.Spider):
                 c_name = comment['nickname']
                 c_client = comment['userClientShow']
                 c_id = "%s_%s" % (product_id, comment['id'])
-                print('{}, {} {} {}\n{}\n'.format(c_id, c_name, c_time, c_client, c_content))
+                print('{}, {} {} {}\n{}\n'.format(
+                    c_id, c_name, c_time, c_client, c_content))
                 yield JDCommentItem(
                     id=c_id,
                     user_name=c_name,
@@ -67,27 +68,26 @@ class Jd1Spider(scrapy.Spider):
             if page == 0:
                 productCommentSummary = j['productCommentSummary']
                 yield JDCommentSummary(
-                    id = product_id,
-                    good_count = productCommentSummary['goodCount'],
-                    general_count= productCommentSummary['generalCount'],
-                    poor_count= productCommentSummary['poorCount'],
-                    comment_count= productCommentSummary['commentCount'],
-                    default_good_count= productCommentSummary['defaultGoodCount'],
-                    good_rate= productCommentSummary['goodRate']
+                    id=product_id,
+                    good_count=productCommentSummary['goodCount'],
+                    general_count=productCommentSummary['generalCount'],
+                    poor_count=productCommentSummary['poorCount'],
+                    comment_count=productCommentSummary['commentCount'],
+                    default_good_count=productCommentSummary['defaultGoodCount'],
+                    good_rate=productCommentSummary['goodRate']
                 )
 
-
             page = page + 1
 
             next_comment_url = self.make_comment_url(product_id, page)
 
             # time.sleep(10)
 
-            if page>settings['COMMENT_MAX_PAGE']:
+            if page > settings['COMMENT_MAX_PAGE']:
                 print("评论抓取达到最大深度, %s, 页数: %d" % (product_id, page))
             else:
                 yield Request(next_comment_url, self.parse_comment,
-                          meta={'product_id': product_id, 'page': page})
+                              meta={'product_id': product_id, 'page': page})
         else:
             print("评论抓取完成, %s, 页数: %d" % (product_id, page))
 
@@ -96,20 +96,26 @@ class Jd1Spider(scrapy.Spider):
 
         for item in items:
             # pdb.set_trace()
-            name = item.xpath('.//div[contains(@class, "p-name")]/a/em/text()').extract_first()
+            name = item.xpath(
+                './/div[contains(@class, "p-name")]/a/em/text()').extract_first()
             print(name)
             price = item.xpath(
                 './/div[contains(@class, "p-price")]/strong/i/text()').extract_first()
             print(price)
-            url = item.xpath('.//div[contains(@class, "p-name")]/a/@href').extract_first()
+            url = item.xpath(
+                './/div[contains(@class, "p-name")]/a/@href').extract_first()
             print('https:' + url)
-            img = item.xpath('.//div[@class="p-img"]/a/img/@data-lazy-img').extract_first()
+            img = item.xpath(
+                './/div[@class="p-img"]/a/img/@data-lazy-img').extract_first()
             if not img:
-                img = item.xpath('.//div[@class="p-img"]/a/img/@src').extract_first()
+                img = item.xpath(
+                    './/div[@class="p-img"]/a/img/@src').extract_first()
 
-            comment_num = item.xpath('.//div[@class="p-commit"]/strong/a/text()').extract_first()
+            comment_num = item.xpath(
+                './/div[@class="p-commit"]/strong/a/text()').extract_first()
             print(comment_num)
-            shop = item.xpath('.//div[@class="p-shop"]/span/a/@title').extract_first()
+            shop = item.xpath(
+                './/div[@class="p-shop"]/span/a/@title').extract_first()
             print(shop)
 
             yield JDProductItem(
@@ -132,7 +138,8 @@ class Jd1Spider(scrapy.Spider):
                               priority=100)
 
         # 获取下一页
-        next_page = response.xpath('//a[@class="pn-next"]/@href').extract_first()
+        next_page = response.xpath(
+            '//a[@class="pn-next"]/@href').extract_first()
         if next_page:
             page = 1
             m = re.match(r'.+page=(\d+).+', next_page)
diff --git a/chapter2/mysite/myapp/static/scripts/index.js b/chapter2/mysite/myapp/static/scripts/index.js
index 629a227..cb24aa9 100755
--- a/chapter2/mysite/myapp/static/scripts/index.js
+++ b/chapter2/mysite/myapp/static/scripts/index.js
@@ -158,6 +158,7 @@ function init(){
       ]
     });
 
+    if(document.getElementById('histogramChart')){
     var histogramChart = echarts.init(document.getElementById('histogramChart'));
     histogramChart.setOption({
 
@@ -255,6 +256,8 @@ function init(){
       ]
    });
 
+    }
+
    var lineChart2 = echarts.init(document.getElementById('lineChart2'));
    lineChart2.setOption({
 
diff --git a/chapter2/mysite/myapp/static/styles/common.css b/chapter2/mysite/myapp/static/styles/common.css
index 6f97d87..6aafd2a 100755
--- a/chapter2/mysite/myapp/static/styles/common.css
+++ b/chapter2/mysite/myapp/static/styles/common.css
@@ -1042,3 +1042,44 @@ border-radius: 3px;
 .popBox .ttBox{height: 30px; line-height: 30px; padding: 14px 30px; border-bottom: solid 1px #eef0f1;text-align: center;-webkit-box-sizing: content-box;  -moz-box-sizing: content-box;box-sizing: content-box;}
 .popBox .ttBox .tt{font-size: 20px; display: inline-block;  height: 30px;}
 .popBox .txtBox{height: calc(100% - 80px);overflow: auto;padding: 10px 0;}
+
+.percentChart{
+    font-size: 14px;
+    padding: 90px 40px 0px 0px;
+}
+.percentChart li{
+    margin-bottom: 25px;
+    clear:both;
+    line-height: 35px;
+    display: flex;
+}
+.percentChart li span.leftTitle{
+    text-align: right;
+    display: block;
+    color: #ffffff;
+    width: 130px;
+}
+.percentChart li .rightPercent{
+    flex:1;
+    position: relative;
+    background-color: #32CD32;
+    height: 35px;
+    margin-left: 10px;
+}
+.percentChart li .rightPercent span{
+    position: absolute;
+    color: #fff;
+    right: 5px;
+    font-size: 12px;
+    line-height: 35px
+}
+.percentChart li .rightPercent p{
+    background-color: #FF7F50;
+    width: 50%;
+    text-align: right;
+    color: #ffffff;
+    line-height: 35px;
+    font-size: 12px;
+    padding-right: 5px;
+    box-sizing:border-box;
+}
\ No newline at end of file
diff --git a/chapter2/mysite/myapp/templates/myapp/crawl.html b/chapter2/mysite/myapp/templates/myapp/crawl.html
index 24cb49c..385d2fd 100644
--- a/chapter2/mysite/myapp/templates/myapp/crawl.html
+++ b/chapter2/mysite/myapp/templates/myapp/crawl.html
@@ -139,7 +139,29 @@
             </div>
             <div class="div_any_child">
                 <div class="div_any_title"><img src="{% static "images/title_4.png" %}">商品数据采集(当日)</div>
-                <p id="lineChart2" class="p_chart"></p>
+                <div class="percentChart">
+                    <li>
+                     <span class="leftTitle">京东Android</span>
+                     <div class="rightPercent">
+                      <span>120</span>
+                      <p style="width:30%">40</p >
+                     </div>
+                    </li>
+                    <li>
+                     <span class="leftTitle">京东iPhone</span>
+                     <div class="rightPercent">
+                      <span>500</span>
+                      <p style="width:60%">300</p >
+                     </div>
+                    </li>
+                    <li>
+                     <span class="leftTitle">微信购物</span>
+                     <div class="rightPercent">
+                      <span>300</span>
+                      <p style="width:50%">150</p >
+                     </div>
+                    </li>
+                   </div>
             </div>
         </div>
     </div>
diff --git a/chapter3/spark.py b/chapter3/spark.py
index 959e9d8..4ffc4de 100644
--- a/chapter3/spark.py
+++ b/chapter3/spark.py
@@ -89,6 +89,7 @@ def collect_crawl_info(spark):
     
 
 def collect_news(spark):
+    """获取最新的20条采集"""
     df = spark.sql("select * from jd_comment order by created_at desc limit 20")
     
     for row in df.rdd.collect():
diff --git a/chapter1/requirements.txt b/requirements.txt
similarity index 98%
rename from chapter1/requirements.txt
rename to requirements.txt
index a0fc317..abf0ad8 100644
--- a/chapter1/requirements.txt
+++ b/requirements.txt
@@ -33,6 +33,7 @@ jupyter-core==4.4.0
 lxml==4.3.0
 MarkupSafe==1.1.0
 mistune==0.8.4
+mysqlclient==1.3.14
 nbconvert==5.4.0
 nbformat==4.4.0
 notebook==5.7.4