This commit is contained in:
parent
1beabd758e
commit
406fab63fe
|
@ -6,3 +6,4 @@ chapter1/crawler/datas/comments/
|
||||||
chapter1/crawler/datas/comments1/
|
chapter1/crawler/datas/comments1/
|
||||||
chapter1/crawler/datas/products/
|
chapter1/crawler/datas/products/
|
||||||
chapter1/crawler/taobao/settings.py
|
chapter1/crawler/taobao/settings.py
|
||||||
|
chapter2/mysite/mysite/settings.py
|
||||||
|
|
|
@ -9,7 +9,6 @@ from scrapy_splash import SplashRequest
|
||||||
import pdb
|
import pdb
|
||||||
from taobao.items import JDProductItem, JDCommentItem, JDCommentSummary
|
from taobao.items import JDProductItem, JDCommentItem, JDCommentSummary
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from taobao.utils import now_time
|
from taobao.utils import now_time
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -34,11 +34,27 @@ create table platform_infos (
|
||||||
|
|
||||||
|
|
||||||
create table news (
|
create table news (
|
||||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||||
|
|
||||||
comment_time varchar(255) comment '评论时间',
|
comment_time varchar(255) comment '评论时间',
|
||||||
content varchar(1024) comment '评论内容',
|
content varchar(1024) comment '评论内容',
|
||||||
comment_id varchar(255) comment '评论ID',
|
comment_id varchar(255) comment '评论ID',
|
||||||
|
|
||||||
PRIMARY KEY (`id`)
|
PRIMARY KEY (`id`)
|
||||||
)comment='最新抓取的20条信息';
|
)comment='最新抓取的20条信息';
|
||||||
|
|
||||||
|
create table last_day_counts (
|
||||||
|
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||||
|
last_day int default 0,
|
||||||
|
product_c int default 0,
|
||||||
|
comment_c int default 0,
|
||||||
|
PRIMARY KEY (`id`)
|
||||||
|
)comment ='最后5天历史采集量';
|
||||||
|
|
||||||
|
insert into last_day_counts(last_day, product_c, comment_c) values(1, 0, 0);
|
||||||
|
insert into last_day_counts(last_day, product_c, comment_c) values(2, 0, 0);
|
||||||
|
insert into last_day_counts(last_day, product_c, comment_c) values(3, 0, 0);
|
||||||
|
insert into last_day_counts(last_day, product_c, comment_c) values(4, 0, 0);
|
||||||
|
insert into last_day_counts(last_day, product_c, comment_c) values(5, 0, 0);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -304,7 +304,7 @@ function init(){
|
||||||
color : '#ffffff',
|
color : '#ffffff',
|
||||||
|
|
||||||
},
|
},
|
||||||
data : ['厦门第一医院','厦门中山医院','厦门中医院','厦门第五医院',],
|
data : ['手机','笔记本',],
|
||||||
},
|
},
|
||||||
calculable : false,
|
calculable : false,
|
||||||
tooltip : {
|
tooltip : {
|
||||||
|
@ -340,7 +340,7 @@ function init(){
|
||||||
xAxis: [
|
xAxis: [
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
data : ['8:00','10:00','12:00','14:00','16:00','18:00'],
|
data : ['四天前','三天前','二天前','昨天','今天'],
|
||||||
axisLine:{
|
axisLine:{
|
||||||
lineStyle:{
|
lineStyle:{
|
||||||
color: '#034c6a'
|
color: '#034c6a'
|
||||||
|
@ -373,7 +373,7 @@ function init(){
|
||||||
},
|
},
|
||||||
series : [
|
series : [
|
||||||
{
|
{
|
||||||
name:'厦门第一医院',
|
name:'手机',
|
||||||
type:'line',
|
type:'line',
|
||||||
smooth:true,
|
smooth:true,
|
||||||
itemStyle: {
|
itemStyle: {
|
||||||
|
@ -383,10 +383,10 @@ function init(){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
data:[15, 0, 20, 45, 22.1, 25,].reverse()
|
data:[15, 0, 20, 45, 22.1,]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name:'厦门中山医院',
|
name:'笔记本',
|
||||||
type:'line',
|
type:'line',
|
||||||
smooth:true,
|
smooth:true,
|
||||||
itemStyle: {
|
itemStyle: {
|
||||||
|
@ -396,34 +396,9 @@ function init(){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
data:[25, 10, 30, 55, 32.1, 35, ].reverse()
|
data:[25, 10, 30, 55, 32.1, ]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name:'厦门中医院',
|
|
||||||
type:'line',
|
|
||||||
smooth:true,
|
|
||||||
itemStyle: {
|
|
||||||
normal: {
|
|
||||||
lineStyle: {
|
|
||||||
shadowColor : 'rgba(0,0,0,0.4)'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
data:[35, 20, 40, 65, 42.1, 45, ].reverse()
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name:'厦门第五医院',
|
|
||||||
type:'line',
|
|
||||||
smooth:true,
|
|
||||||
itemStyle: {
|
|
||||||
normal: {
|
|
||||||
lineStyle: {
|
|
||||||
shadowColor : 'rgba(0,0,0,0.4)'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
data:[45, 30, 50, 75, 52.1, 55, 6].reverse()
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -131,11 +131,11 @@
|
||||||
</div>
|
</div>
|
||||||
<div class="right div_any01">
|
<div class="right div_any01">
|
||||||
<div class="div_any_child">
|
<div class="div_any_child">
|
||||||
<div class="div_any_title"><img src="{% static "images/title_3.png" %}">数据采集条数(当日)</div>
|
<div class="div_any_title"><img src="{% static "images/title_3.png" %}">评论数据采集条数(当日)</div>
|
||||||
<p id="lineChart" class="p_chart"></p>
|
<p id="lineChart" class="p_chart"></p>
|
||||||
</div>
|
</div>
|
||||||
<div class="div_any_child">
|
<div class="div_any_child">
|
||||||
<div class="div_any_title"><img src="{% static "images/title_4.png" %}"> 采集人数(当日)</div>
|
<div class="div_any_title"><img src="{% static "images/title_4.png" %}">商品数据采集(当日)</div>
|
||||||
<p id="lineChart2" class="p_chart"></p>
|
<p id="lineChart2" class="p_chart"></p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -3,6 +3,7 @@ from pyspark.sql import Row
|
||||||
# $example off:spark_hive$
|
# $example off:spark_hive$
|
||||||
import os
|
import os
|
||||||
import pymysql
|
import pymysql
|
||||||
|
import datetime
|
||||||
import time
|
import time
|
||||||
|
|
||||||
def mysql_query(sql):
|
def mysql_query(sql):
|
||||||
|
@ -29,6 +30,12 @@ def mysql_execute(sql):
|
||||||
def today():
|
def today():
|
||||||
return time.strftime('%Y-%m-%d')
|
return time.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
def getYesterday(day):
|
||||||
|
today=datetime.date.today()
|
||||||
|
oneday=datetime.timedelta(days=day)
|
||||||
|
yesterday=today-oneday
|
||||||
|
return yesterday.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
|
||||||
def collect_crawl_info(spark):
|
def collect_crawl_info(spark):
|
||||||
df = spark.sql("select count(*) as N from jd_comment")
|
df = spark.sql("select count(*) as N from jd_comment")
|
||||||
|
@ -57,6 +64,16 @@ def collect_news(spark):
|
||||||
mysql_execute("insert into news (comment_time, content, comment_id) values ('{}', '{}', '{}')".format(
|
mysql_execute("insert into news (comment_time, content, comment_id) values ('{}', '{}', '{}')".format(
|
||||||
row["comment_time"], row["content"], row["id"]))
|
row["comment_time"], row["content"], row["id"]))
|
||||||
|
|
||||||
|
def get_last_day_count(spark):
|
||||||
|
"""获取过去几天的采集量"""
|
||||||
|
for i in range(5):
|
||||||
|
df = spark.sql("select count(*) as N from jd where created_at like '"+getYesterday(i+1)+"%'")
|
||||||
|
jd_last_count = df.rdd.collect()[0]["N"]
|
||||||
|
|
||||||
|
df = spark.sql("select count(*) as N from jd_comment where created_at like '"+getYesterday(i+1)+"%'")
|
||||||
|
jd_comment_last_count = df.rdd.collect()[0]["N"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# $example on:spark_hive$
|
# $example on:spark_hive$
|
||||||
|
|
Loading…
Reference in New Issue