From b94235fb7515c11d93ee5de4afd769204815c4db Mon Sep 17 00:00:00 2001 From: guange <8863824@gmail.com> Date: Thu, 17 Jan 2019 23:16:48 +0800 Subject: [PATCH] . --- chapter3/spark.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/chapter3/spark.py b/chapter3/spark.py index 4452d59..a31777f 100644 --- a/chapter3/spark.py +++ b/chapter3/spark.py @@ -42,22 +42,26 @@ if __name__ == "__main__": .enableHiveSupport() \ .getOrCreate() - df = spark.sql("select count(*) as N from jd_comment") - jd_comment_count = df.rdd.collect()[0]["N"] + while True: + df = spark.sql("select count(*) as N from jd_comment") + jd_comment_count = df.rdd.collect()[0]["N"] - df = spark.sql("select count(*) as N from jd_comment where created_at like '"+today()+"%'") - jd_comment_today_count = df.rdd.collect()[0]["N"] + df = spark.sql("select count(*) as N from jd_comment where created_at like '"+today()+"%'") + jd_comment_today_count = df.rdd.collect()[0]["N"] - df = spark.sql("select count(*) as N from jd") - jd_count = df.rdd.collect()[0]["N"] + df = spark.sql("select count(*) as N from jd") + jd_count = df.rdd.collect()[0]["N"] - df = spark.sql("select count(*) as N from jd where created_at like '"+today()+"%'") - jd_today_count = df.rdd.collect()[0]["N"] + df = spark.sql("select count(*) as N from jd where created_at like '"+today()+"%'") + jd_today_count = df.rdd.collect()[0]["N"] - total_count = jd_comment_count + jd_count - today_total_count = jd_comment_today_count + jd_today_count + total_count = jd_comment_count + jd_count + today_total_count = jd_comment_today_count + jd_today_count - mysql_execute("insert into crawl_infos (total_count, today_total_count, product_count, today_product_count, comment_count, today_comment_count) values ({},{},{},{},{},{})".format( - total_count, today_total_count, jd_count,jd_today_count, jd_comment_count, jd_comment_today_count) ) + mysql_execute("insert into crawl_infos (total_count, today_total_count, product_count, today_product_count, comment_count, today_comment_count) values ({},{},{},{},{},{})".format( + total_count, today_total_count, jd_count,jd_today_count, jd_comment_count, jd_comment_today_count) ) + + time.sleep(10) + spark.stop() \ No newline at end of file