.

2019-01-18 11:31:19 +08:00 · 2019-01-18 11:31:19 +08:00 · 406fab63fe
parent 1beabd758e
commit 406fab63fe
6 changed files with 48 additions and 40 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,3 +6,4 @@ chapter1/crawler/datas/comments/
 chapter1/crawler/datas/comments1/
 chapter1/crawler/datas/products/
 chapter1/crawler/taobao/settings.py
+chapter2/mysite/mysite/settings.py
--- a/chapter1/crawler/taobao/spiders/jd1.py
+++ b/chapter1/crawler/taobao/spiders/jd1.py
@ -9,7 +9,6 @@ from scrapy_splash import SplashRequest
 import pdb
 from taobao.items import JDProductItem, JDCommentItem, JDCommentSummary
 import re
-
 from taobao.utils import now_time


--- a/chapter2/init.sql
+++ b/chapter2/init.sql
@ -42,3 +42,19 @@ comment_id varchar(255) comment '评论ID',

  PRIMARY KEY (`id`)
 )comment='最新抓取的20条信息';
+
+create table last_day_counts (
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  last_day int default 0,
+  product_c int default 0,
+  comment_c int default 0,
+  PRIMARY KEY (`id`)
+)comment ='最后5天历史采集量';
+
+insert into last_day_counts(last_day, product_c, comment_c) values(1, 0, 0);
+insert into last_day_counts(last_day, product_c, comment_c) values(2, 0, 0);
+insert into last_day_counts(last_day, product_c, comment_c) values(3, 0, 0);
+insert into last_day_counts(last_day, product_c, comment_c) values(4, 0, 0);
+insert into last_day_counts(last_day, product_c, comment_c) values(5, 0, 0);
+
+
--- a/chapter2/mysite/myapp/static/scripts/index.js
+++ b/chapter2/mysite/myapp/static/scripts/index.js
@ -304,7 +304,7 @@ function init(){
             color : '#ffffff',

         },
-          data : ['厦门第一医院','厦门中山医院','厦门中医院','厦门第五医院',],
+          data : ['手机','笔记本',],
     },
     calculable : false,
     tooltip : {
@ -340,7 +340,7 @@ function init(){
       xAxis: [
           {
               type: 'category',
-               data : ['8:00','10:00','12:00','14:00','16:00','18:00'],
+               data : ['四天前','三天前','二天前','昨天','今天'],
               axisLine:{
                   lineStyle:{
                       color: '#034c6a'
@ -373,7 +373,7 @@ function init(){
       },
       series : [
         {
-             name:'厦门第一医院',
+             name:'手机',
             type:'line',
             smooth:true,
             itemStyle: {
@ -383,10 +383,10 @@ function init(){
                     }
                 }
             },
-             data:[15, 0, 20, 45, 22.1, 25,].reverse()
+             data:[15, 0, 20, 45, 22.1,]
         },
         {
-             name:'厦门中山医院',
+             name:'笔记本',
             type:'line',
             smooth:true,
             itemStyle: {
@ -396,34 +396,9 @@ function init(){
                     }
                 }
             },
-             data:[25, 10, 30, 55, 32.1, 35, ].reverse()
+             data:[25, 10, 30, 55, 32.1, ]
         },
-         {
-             name:'厦门中医院',
-             type:'line',
-             smooth:true,
-             itemStyle: {
-                 normal: {
-                     lineStyle: {
-                         shadowColor : 'rgba(0,0,0,0.4)'
-                     }
-                 }
-             },
-             data:[35, 20, 40, 65, 42.1, 45, ].reverse()
-         },
-         {
-             name:'厦门第五医院',
-             type:'line',
-             smooth:true,
-             itemStyle: {
-                 normal: {
-                     lineStyle: {
-                         shadowColor : 'rgba(0,0,0,0.4)'
-                     }
-                 }
-             },
-             data:[45, 30, 50, 75, 52.1, 55, 6].reverse()
-         }
+         
     ]
   });

--- a/chapter2/mysite/myapp/templates/myapp/crawl.html
+++ b/chapter2/mysite/myapp/templates/myapp/crawl.html
@ -131,11 +131,11 @@
        </div>
        <div class="right div_any01">
            <div class="div_any_child">
-                <div class="div_any_title"><img src="{% static "images/title_3.png" %}">数据采集条数(当日)</div>
+                <div class="div_any_title"><img src="{% static "images/title_3.png" %}">评论数据采集条数(当日)</div>
                <p id="lineChart" class="p_chart"></p>
            </div>
            <div class="div_any_child">
-                <div class="div_any_title"><img src="{% static "images/title_4.png" %}"> 采集人数(当日)</div>
+                <div class="div_any_title"><img src="{% static "images/title_4.png" %}">商品数据采集(当日)</div>
                <p id="lineChart2" class="p_chart"></p>
            </div>
        </div>
--- a/chapter3/spark.py
+++ b/chapter3/spark.py
@ -3,6 +3,7 @@ from pyspark.sql import Row
 # $example off:spark_hive$
 import os
 import pymysql
+import datetime
 import time

 def mysql_query(sql):
@ -29,6 +30,12 @@ def mysql_execute(sql):
 def today():
    return time.strftime('%Y-%m-%d')

+def getYesterday(day): 
+    today=datetime.date.today() 
+    oneday=datetime.timedelta(days=day) 
+    yesterday=today-oneday  
+    return yesterday.strftime('%Y-%m-%d')
+

 def collect_crawl_info(spark):
    df = spark.sql("select count(*) as N from jd_comment")
@ -57,6 +64,16 @@ def collect_news(spark):
        mysql_execute("insert into news (comment_time, content, comment_id) values ('{}', '{}', '{}')".format(
            row["comment_time"], row["content"], row["id"]))

+def get_last_day_count(spark):
+    """获取过去几天的采集量"""
+    for i in range(5):
+        df = spark.sql("select count(*) as N from jd where created_at like '"+getYesterday(i+1)+"%'")
+        jd_last_count = df.rdd.collect()[0]["N"]
+
+        df = spark.sql("select count(*) as N from jd_comment where created_at like '"+getYesterday(i+1)+"%'")
+        jd_comment_last_count = df.rdd.collect()[0]["N"]
+
+

 if __name__ == "__main__":
    # $example on:spark_hive$