diff --git a/chapter3/spark.py b/chapter3/spark.py index d7e763d..8445a88 100644 --- a/chapter3/spark.py +++ b/chapter3/spark.py @@ -67,10 +67,10 @@ def collect_news(spark): def get_last_day_count(spark): """获取过去几天的采集量""" for i in range(5): - df = spark.sql("select count(*) as N from jd where created_at like '"+getYesterday(i+1)+"%'") + df = spark.sql("select count(*) as N from jd where created_at like '"+getYesterday(i)+"%'") jd_last_count = df.rdd.collect()[0]["N"] - df = spark.sql("select count(*) as N from jd_comment where created_at like '"+getYesterday(i+1)+"%'") + df = spark.sql("select count(*) as N from jd_comment where created_at like '"+getYesterday(i)+"%'") jd_comment_last_count = df.rdd.collect()[0]["N"] mysql_execute("update last_day_counts set product_c = {}, comment_c = {} where last_day = {}".format(