This commit is contained in:
parent
da3f9324b6
commit
411e14684f
|
@ -13,7 +13,7 @@ def crawl(request):
|
||||||
# info = scrapy_client.get_scrapy_info()
|
# info = scrapy_client.get_scrapy_info()
|
||||||
crawl_info = CrawlInfos.objects.order_by('-id').first()
|
crawl_info = CrawlInfos.objects.order_by('-id').first()
|
||||||
platform_info = get_platform_info()
|
platform_info = get_platform_info()
|
||||||
news = News.objects.all()
|
news = News.objects.order_by('-id')[0:20]
|
||||||
return render(request, 'myapp/crawl.html', {"crawl_info": crawl_info,
|
return render(request, 'myapp/crawl.html', {"crawl_info": crawl_info,
|
||||||
"platform_info":json.dumps(platform_info),
|
"platform_info":json.dumps(platform_info),
|
||||||
"news": news})
|
"news": news})
|
||||||
|
|
|
@ -30,6 +30,33 @@ def today():
|
||||||
return time.strftime('%Y-%m-%d')
|
return time.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
|
||||||
|
def collect_crawl_info(spark):
|
||||||
|
df = spark.sql("select count(*) as N from jd_comment")
|
||||||
|
jd_comment_count = df.rdd.collect()[0]["N"]
|
||||||
|
|
||||||
|
df = spark.sql("select count(*) as N from jd_comment where created_at like '"+today()+"%'")
|
||||||
|
jd_comment_today_count = df.rdd.collect()[0]["N"]
|
||||||
|
|
||||||
|
df = spark.sql("select count(*) as N from jd")
|
||||||
|
jd_count = df.rdd.collect()[0]["N"]
|
||||||
|
|
||||||
|
df = spark.sql("select count(*) as N from jd where created_at like '"+today()+"%'")
|
||||||
|
jd_today_count = df.rdd.collect()[0]["N"]
|
||||||
|
|
||||||
|
total_count = jd_comment_count + jd_count
|
||||||
|
today_total_count = jd_comment_today_count + jd_today_count
|
||||||
|
|
||||||
|
mysql_execute("insert into crawl_infos (total_count, today_total_count, product_count, today_product_count, comment_count, today_comment_count) values ({},{},{},{},{},{})".format(
|
||||||
|
total_count, today_total_count, jd_count,jd_today_count, jd_comment_count, jd_comment_today_count) )
|
||||||
|
|
||||||
|
|
||||||
|
def collect_news(spark):
|
||||||
|
df = spark.sql("select * from jd_comment order by created_at desc limit 20")
|
||||||
|
for row in df.rdd.collect():
|
||||||
|
mysql_execute("insert into news (comment_time, content, comment_id) values ('{}', '{}', '{}'')".format(
|
||||||
|
row["comment_time"], row["content"], row["id"]))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# $example on:spark_hive$
|
# $example on:spark_hive$
|
||||||
# warehouse_location points to the default location for managed databases and tables
|
# warehouse_location points to the default location for managed databases and tables
|
||||||
|
@ -43,24 +70,8 @@ if __name__ == "__main__":
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
df = spark.sql("select count(*) as N from jd_comment")
|
collect_crawl_info(spark)
|
||||||
jd_comment_count = df.rdd.collect()[0]["N"]
|
collect_news(spark)
|
||||||
|
|
||||||
df = spark.sql("select count(*) as N from jd_comment where created_at like '"+today()+"%'")
|
|
||||||
jd_comment_today_count = df.rdd.collect()[0]["N"]
|
|
||||||
|
|
||||||
df = spark.sql("select count(*) as N from jd")
|
|
||||||
jd_count = df.rdd.collect()[0]["N"]
|
|
||||||
|
|
||||||
df = spark.sql("select count(*) as N from jd where created_at like '"+today()+"%'")
|
|
||||||
jd_today_count = df.rdd.collect()[0]["N"]
|
|
||||||
|
|
||||||
total_count = jd_comment_count + jd_count
|
|
||||||
today_total_count = jd_comment_today_count + jd_today_count
|
|
||||||
|
|
||||||
mysql_execute("insert into crawl_infos (total_count, today_total_count, product_count, today_product_count, comment_count, today_comment_count) values ({},{},{},{},{},{})".format(
|
|
||||||
total_count, today_total_count, jd_count,jd_today_count, jd_comment_count, jd_comment_today_count) )
|
|
||||||
|
|
||||||
|
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue