This commit is contained in:
guange 2019-01-18 19:34:11 +08:00
parent 6c988d5534
commit 3712b43343
1 changed files with 3 additions and 1 deletions

View File

@ -90,11 +90,13 @@ def collect_crawl_info(spark):
def collect_news(spark):
df = spark.sql("select * from jd_comment order by created_at desc limit 20")
mysql_execute("delete from news")
for row in df.rdd.collect():
mysql_execute("insert into news (comment_time, content, comment_id) values ('{}', '{}', '{}')".format(
row["comment_time"], row["content"], row["id"]))
mysql_execute("delete from news where id not in ( select x.id from (select id from news order by id desc limit 20) as x);")
def get_last_day_count(spark):
"""获取过去几天的采集量"""
for i in range(5):