This commit is contained in:
parent
1beabd758e
commit
406fab63fe
|
@ -6,3 +6,4 @@ chapter1/crawler/datas/comments/
|
|||
chapter1/crawler/datas/comments1/
|
||||
chapter1/crawler/datas/products/
|
||||
chapter1/crawler/taobao/settings.py
|
||||
chapter2/mysite/mysite/settings.py
|
||||
|
|
|
@ -9,7 +9,6 @@ from scrapy_splash import SplashRequest
|
|||
import pdb
|
||||
from taobao.items import JDProductItem, JDCommentItem, JDCommentSummary
|
||||
import re
|
||||
|
||||
from taobao.utils import now_time
|
||||
|
||||
|
||||
|
|
|
@ -34,11 +34,27 @@ create table platform_infos (
|
|||
|
||||
|
||||
create table news (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
|
||||
comment_time varchar(255) comment '评论时间',
|
||||
content varchar(1024) comment '评论内容',
|
||||
comment_id varchar(255) comment '评论ID',
|
||||
comment_time varchar(255) comment '评论时间',
|
||||
content varchar(1024) comment '评论内容',
|
||||
comment_id varchar(255) comment '评论ID',
|
||||
|
||||
PRIMARY KEY (`id`)
|
||||
PRIMARY KEY (`id`)
|
||||
)comment='最新抓取的20条信息';
|
||||
|
||||
create table last_day_counts (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
last_day int default 0,
|
||||
product_c int default 0,
|
||||
comment_c int default 0,
|
||||
PRIMARY KEY (`id`)
|
||||
)comment ='最后5天历史采集量';
|
||||
|
||||
insert into last_day_counts(last_day, product_c, comment_c) values(1, 0, 0);
|
||||
insert into last_day_counts(last_day, product_c, comment_c) values(2, 0, 0);
|
||||
insert into last_day_counts(last_day, product_c, comment_c) values(3, 0, 0);
|
||||
insert into last_day_counts(last_day, product_c, comment_c) values(4, 0, 0);
|
||||
insert into last_day_counts(last_day, product_c, comment_c) values(5, 0, 0);
|
||||
|
||||
|
||||
|
|
|
@ -304,7 +304,7 @@ function init(){
|
|||
color : '#ffffff',
|
||||
|
||||
},
|
||||
data : ['厦门第一医院','厦门中山医院','厦门中医院','厦门第五医院',],
|
||||
data : ['手机','笔记本',],
|
||||
},
|
||||
calculable : false,
|
||||
tooltip : {
|
||||
|
@ -340,7 +340,7 @@ function init(){
|
|||
xAxis: [
|
||||
{
|
||||
type: 'category',
|
||||
data : ['8:00','10:00','12:00','14:00','16:00','18:00'],
|
||||
data : ['四天前','三天前','二天前','昨天','今天'],
|
||||
axisLine:{
|
||||
lineStyle:{
|
||||
color: '#034c6a'
|
||||
|
@ -373,7 +373,7 @@ function init(){
|
|||
},
|
||||
series : [
|
||||
{
|
||||
name:'厦门第一医院',
|
||||
name:'手机',
|
||||
type:'line',
|
||||
smooth:true,
|
||||
itemStyle: {
|
||||
|
@ -383,10 +383,10 @@ function init(){
|
|||
}
|
||||
}
|
||||
},
|
||||
data:[15, 0, 20, 45, 22.1, 25,].reverse()
|
||||
data:[15, 0, 20, 45, 22.1,]
|
||||
},
|
||||
{
|
||||
name:'厦门中山医院',
|
||||
name:'笔记本',
|
||||
type:'line',
|
||||
smooth:true,
|
||||
itemStyle: {
|
||||
|
@ -396,34 +396,9 @@ function init(){
|
|||
}
|
||||
}
|
||||
},
|
||||
data:[25, 10, 30, 55, 32.1, 35, ].reverse()
|
||||
data:[25, 10, 30, 55, 32.1, ]
|
||||
},
|
||||
{
|
||||
name:'厦门中医院',
|
||||
type:'line',
|
||||
smooth:true,
|
||||
itemStyle: {
|
||||
normal: {
|
||||
lineStyle: {
|
||||
shadowColor : 'rgba(0,0,0,0.4)'
|
||||
}
|
||||
}
|
||||
},
|
||||
data:[35, 20, 40, 65, 42.1, 45, ].reverse()
|
||||
},
|
||||
{
|
||||
name:'厦门第五医院',
|
||||
type:'line',
|
||||
smooth:true,
|
||||
itemStyle: {
|
||||
normal: {
|
||||
lineStyle: {
|
||||
shadowColor : 'rgba(0,0,0,0.4)'
|
||||
}
|
||||
}
|
||||
},
|
||||
data:[45, 30, 50, 75, 52.1, 55, 6].reverse()
|
||||
}
|
||||
|
||||
]
|
||||
});
|
||||
|
||||
|
|
|
@ -131,11 +131,11 @@
|
|||
</div>
|
||||
<div class="right div_any01">
|
||||
<div class="div_any_child">
|
||||
<div class="div_any_title"><img src="{% static "images/title_3.png" %}">数据采集条数(当日)</div>
|
||||
<div class="div_any_title"><img src="{% static "images/title_3.png" %}">评论数据采集条数(当日)</div>
|
||||
<p id="lineChart" class="p_chart"></p>
|
||||
</div>
|
||||
<div class="div_any_child">
|
||||
<div class="div_any_title"><img src="{% static "images/title_4.png" %}"> 采集人数(当日)</div>
|
||||
<div class="div_any_title"><img src="{% static "images/title_4.png" %}">商品数据采集(当日)</div>
|
||||
<p id="lineChart2" class="p_chart"></p>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -3,6 +3,7 @@ from pyspark.sql import Row
|
|||
# $example off:spark_hive$
|
||||
import os
|
||||
import pymysql
|
||||
import datetime
|
||||
import time
|
||||
|
||||
def mysql_query(sql):
|
||||
|
@ -29,6 +30,12 @@ def mysql_execute(sql):
|
|||
def today():
|
||||
return time.strftime('%Y-%m-%d')
|
||||
|
||||
def getYesterday(day):
|
||||
today=datetime.date.today()
|
||||
oneday=datetime.timedelta(days=day)
|
||||
yesterday=today-oneday
|
||||
return yesterday.strftime('%Y-%m-%d')
|
||||
|
||||
|
||||
def collect_crawl_info(spark):
|
||||
df = spark.sql("select count(*) as N from jd_comment")
|
||||
|
@ -57,6 +64,16 @@ def collect_news(spark):
|
|||
mysql_execute("insert into news (comment_time, content, comment_id) values ('{}', '{}', '{}')".format(
|
||||
row["comment_time"], row["content"], row["id"]))
|
||||
|
||||
def get_last_day_count(spark):
|
||||
"""获取过去几天的采集量"""
|
||||
for i in range(5):
|
||||
df = spark.sql("select count(*) as N from jd where created_at like '"+getYesterday(i+1)+"%'")
|
||||
jd_last_count = df.rdd.collect()[0]["N"]
|
||||
|
||||
df = spark.sql("select count(*) as N from jd_comment where created_at like '"+getYesterday(i+1)+"%'")
|
||||
jd_comment_last_count = df.rdd.collect()[0]["N"]
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# $example on:spark_hive$
|
||||
|
|
Loading…
Reference in New Issue