This commit is contained in:
guange 2019-01-18 11:31:19 +08:00
parent 1beabd758e
commit 406fab63fe
6 changed files with 48 additions and 40 deletions

1
.gitignore vendored
View File

@ -6,3 +6,4 @@ chapter1/crawler/datas/comments/
chapter1/crawler/datas/comments1/
chapter1/crawler/datas/products/
chapter1/crawler/taobao/settings.py
chapter2/mysite/mysite/settings.py

View File

@ -9,7 +9,6 @@ from scrapy_splash import SplashRequest
import pdb
from taobao.items import JDProductItem, JDCommentItem, JDCommentSummary
import re
from taobao.utils import now_time

View File

@ -42,3 +42,19 @@ comment_id varchar(255) comment '评论ID',
PRIMARY KEY (`id`)
)comment='最新抓取的20条信息';
create table last_day_counts (
`id` int(11) NOT NULL AUTO_INCREMENT,
last_day int default 0,
product_c int default 0,
comment_c int default 0,
PRIMARY KEY (`id`)
)comment ='最后5天历史采集量';
insert into last_day_counts(last_day, product_c, comment_c) values(1, 0, 0);
insert into last_day_counts(last_day, product_c, comment_c) values(2, 0, 0);
insert into last_day_counts(last_day, product_c, comment_c) values(3, 0, 0);
insert into last_day_counts(last_day, product_c, comment_c) values(4, 0, 0);
insert into last_day_counts(last_day, product_c, comment_c) values(5, 0, 0);

View File

@ -304,7 +304,7 @@ function init(){
color : '#ffffff',
},
data : ['厦门第一医院','厦门中山医院','厦门中医院','厦门第五医院',],
data : ['手机','笔记本',],
},
calculable : false,
tooltip : {
@ -340,7 +340,7 @@ function init(){
xAxis: [
{
type: 'category',
data : ['8:00','10:00','12:00','14:00','16:00','18:00'],
data : ['四天前','三天前','二天前','昨天','今天'],
axisLine:{
lineStyle:{
color: '#034c6a'
@ -373,7 +373,7 @@ function init(){
},
series : [
{
name:'厦门第一医院',
name:'手机',
type:'line',
smooth:true,
itemStyle: {
@ -383,10 +383,10 @@ function init(){
}
}
},
data:[15, 0, 20, 45, 22.1, 25,].reverse()
data:[15, 0, 20, 45, 22.1,]
},
{
name:'厦门中山医院',
name:'笔记本',
type:'line',
smooth:true,
itemStyle: {
@ -396,34 +396,9 @@ function init(){
}
}
},
data:[25, 10, 30, 55, 32.1, 35, ].reverse()
data:[25, 10, 30, 55, 32.1, ]
},
{
name:'厦门中医院',
type:'line',
smooth:true,
itemStyle: {
normal: {
lineStyle: {
shadowColor : 'rgba(0,0,0,0.4)'
}
}
},
data:[35, 20, 40, 65, 42.1, 45, ].reverse()
},
{
name:'厦门第五医院',
type:'line',
smooth:true,
itemStyle: {
normal: {
lineStyle: {
shadowColor : 'rgba(0,0,0,0.4)'
}
}
},
data:[45, 30, 50, 75, 52.1, 55, 6].reverse()
}
]
});

View File

@ -131,11 +131,11 @@
</div>
<div class="right div_any01">
<div class="div_any_child">
<div class="div_any_title"><img src="{% static "images/title_3.png" %}">数据采集条数(当日)</div>
<div class="div_any_title"><img src="{% static "images/title_3.png" %}">评论数据采集条数(当日)</div>
<p id="lineChart" class="p_chart"></p>
</div>
<div class="div_any_child">
<div class="div_any_title"><img src="{% static "images/title_4.png" %}"> 采集人数(当日)</div>
<div class="div_any_title"><img src="{% static "images/title_4.png" %}">商品数据采集(当日)</div>
<p id="lineChart2" class="p_chart"></p>
</div>
</div>

View File

@ -3,6 +3,7 @@ from pyspark.sql import Row
# $example off:spark_hive$
import os
import pymysql
import datetime
import time
def mysql_query(sql):
@ -29,6 +30,12 @@ def mysql_execute(sql):
def today():
return time.strftime('%Y-%m-%d')
def getYesterday(day):
today=datetime.date.today()
oneday=datetime.timedelta(days=day)
yesterday=today-oneday
return yesterday.strftime('%Y-%m-%d')
def collect_crawl_info(spark):
df = spark.sql("select count(*) as N from jd_comment")
@ -57,6 +64,16 @@ def collect_news(spark):
mysql_execute("insert into news (comment_time, content, comment_id) values ('{}', '{}', '{}')".format(
row["comment_time"], row["content"], row["id"]))
def get_last_day_count(spark):
"""获取过去几天的采集量"""
for i in range(5):
df = spark.sql("select count(*) as N from jd where created_at like '"+getYesterday(i+1)+"%'")
jd_last_count = df.rdd.collect()[0]["N"]
df = spark.sql("select count(*) as N from jd_comment where created_at like '"+getYesterday(i+1)+"%'")
jd_comment_last_count = df.rdd.collect()[0]["N"]
if __name__ == "__main__":
# $example on:spark_hive$