This commit is contained in:
guange 2019-01-17 23:16:48 +08:00
parent 51c41dd0d1
commit b94235fb75
1 changed files with 16 additions and 12 deletions

View File

@ -42,22 +42,26 @@ if __name__ == "__main__":
.enableHiveSupport() \ .enableHiveSupport() \
.getOrCreate() .getOrCreate()
df = spark.sql("select count(*) as N from jd_comment") while True:
jd_comment_count = df.rdd.collect()[0]["N"] df = spark.sql("select count(*) as N from jd_comment")
jd_comment_count = df.rdd.collect()[0]["N"]
df = spark.sql("select count(*) as N from jd_comment where created_at like '"+today()+"%'") df = spark.sql("select count(*) as N from jd_comment where created_at like '"+today()+"%'")
jd_comment_today_count = df.rdd.collect()[0]["N"] jd_comment_today_count = df.rdd.collect()[0]["N"]
df = spark.sql("select count(*) as N from jd") df = spark.sql("select count(*) as N from jd")
jd_count = df.rdd.collect()[0]["N"] jd_count = df.rdd.collect()[0]["N"]
df = spark.sql("select count(*) as N from jd where created_at like '"+today()+"%'") df = spark.sql("select count(*) as N from jd where created_at like '"+today()+"%'")
jd_today_count = df.rdd.collect()[0]["N"] jd_today_count = df.rdd.collect()[0]["N"]
total_count = jd_comment_count + jd_count total_count = jd_comment_count + jd_count
today_total_count = jd_comment_today_count + jd_today_count today_total_count = jd_comment_today_count + jd_today_count
mysql_execute("insert into crawl_infos (total_count, today_total_count, product_count, today_product_count, comment_count, today_comment_count) values ({},{},{},{},{},{})".format( mysql_execute("insert into crawl_infos (total_count, today_total_count, product_count, today_product_count, comment_count, today_comment_count) values ({},{},{},{},{},{})".format(
total_count, today_total_count, jd_count,jd_today_count, jd_comment_count, jd_comment_today_count) ) total_count, today_total_count, jd_count,jd_today_count, jd_comment_count, jd_comment_today_count) )
time.sleep(10)
spark.stop() spark.stop()