This commit is contained in:
parent
6392531b09
commit
4a8a5288f4
|
@ -25,6 +25,9 @@ def mysql_execute(sql):
|
|||
finally:
|
||||
db.close()
|
||||
|
||||
def today():
|
||||
return time.strftime('%Y-%m-%d')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# $example on:spark_hive$
|
||||
|
@ -38,6 +41,22 @@ if __name__ == "__main__":
|
|||
.enableHiveSupport() \
|
||||
.getOrCreate()
|
||||
|
||||
df = spark.sql("select * from jd limit 1")
|
||||
df = sqlContext.sql("select count(*) as N from jd_comment")
|
||||
jd_comment_count = df.rdd.collect()[0]["N"]
|
||||
|
||||
df = sqlContext.sql("select count(*) as N from jd_comment where created_at like '"+today()+"%'")
|
||||
jd_comment_today_count = df.rdd.collect()[0]["N"]
|
||||
|
||||
df = sqlContext.sql("select count(*) as N from jd")
|
||||
jd_count = df.rdd.collect()[0]["N"]
|
||||
|
||||
df = sqlContext.sql("select count(*) as N from jd where created_at like '"+today()+"%'")
|
||||
jd_today_count = df.rdd.collect()[0]["N"]
|
||||
|
||||
total_count = jd_comment_count + jd_count
|
||||
today_total_count = jd_comment_today_count + jd_today_count
|
||||
|
||||
mysql_execute("insert into crawl_infos (total_count, today_total_count, product_count, today_product_count, comment_count, today_comment_count) values ({},{},{},{},{},{})".format(
|
||||
total_count, today_total_count, jd_count,jd_today_count, jd_comment_count, jd_comment_today_count) )
|
||||
|
||||
spark.stop()
|
Loading…
Reference in New Issue