diff --git a/chapter3/spark.py b/chapter3/spark.py index 4452d59..a31777f 100644 --- a/chapter3/spark.py +++ b/chapter3/spark.py @@ -42,22 +42,26 @@ if __name__ == "__main__": .enableHiveSupport() \ .getOrCreate() - df = spark.sql("select count(*) as N from jd_comment") - jd_comment_count = df.rdd.collect()[0]["N"] + while True: + df = spark.sql("select count(*) as N from jd_comment") + jd_comment_count = df.rdd.collect()[0]["N"] - df = spark.sql("select count(*) as N from jd_comment where created_at like '"+today()+"%'") - jd_comment_today_count = df.rdd.collect()[0]["N"] + df = spark.sql("select count(*) as N from jd_comment where created_at like '"+today()+"%'") + jd_comment_today_count = df.rdd.collect()[0]["N"] - df = spark.sql("select count(*) as N from jd") - jd_count = df.rdd.collect()[0]["N"] + df = spark.sql("select count(*) as N from jd") + jd_count = df.rdd.collect()[0]["N"] - df = spark.sql("select count(*) as N from jd where created_at like '"+today()+"%'") - jd_today_count = df.rdd.collect()[0]["N"] + df = spark.sql("select count(*) as N from jd where created_at like '"+today()+"%'") + jd_today_count = df.rdd.collect()[0]["N"] - total_count = jd_comment_count + jd_count - today_total_count = jd_comment_today_count + jd_today_count + total_count = jd_comment_count + jd_count + today_total_count = jd_comment_today_count + jd_today_count - mysql_execute("insert into crawl_infos (total_count, today_total_count, product_count, today_product_count, comment_count, today_comment_count) values ({},{},{},{},{},{})".format( - total_count, today_total_count, jd_count,jd_today_count, jd_comment_count, jd_comment_today_count) ) + mysql_execute("insert into crawl_infos (total_count, today_total_count, product_count, today_product_count, comment_count, today_comment_count) values ({},{},{},{},{},{})".format( + total_count, today_total_count, jd_count,jd_today_count, jd_comment_count, jd_comment_today_count) ) + + time.sleep(10) + spark.stop() \ No newline at end of file