This commit is contained in:
parent
6c988d5534
commit
3712b43343
|
@ -90,11 +90,13 @@ def collect_crawl_info(spark):
|
||||||
|
|
||||||
def collect_news(spark):
|
def collect_news(spark):
|
||||||
df = spark.sql("select * from jd_comment order by created_at desc limit 20")
|
df = spark.sql("select * from jd_comment order by created_at desc limit 20")
|
||||||
mysql_execute("delete from news")
|
|
||||||
for row in df.rdd.collect():
|
for row in df.rdd.collect():
|
||||||
mysql_execute("insert into news (comment_time, content, comment_id) values ('{}', '{}', '{}')".format(
|
mysql_execute("insert into news (comment_time, content, comment_id) values ('{}', '{}', '{}')".format(
|
||||||
row["comment_time"], row["content"], row["id"]))
|
row["comment_time"], row["content"], row["id"]))
|
||||||
|
|
||||||
|
mysql_execute("delete from news where id not in ( select x.id from (select id from news order by id desc limit 20) as x);")
|
||||||
|
|
||||||
def get_last_day_count(spark):
|
def get_last_day_count(spark):
|
||||||
"""获取过去几天的采集量"""
|
"""获取过去几天的采集量"""
|
||||||
for i in range(5):
|
for i in range(5):
|
||||||
|
|
Loading…
Reference in New Issue