diff --git a/chapter3/spark.py b/chapter3/spark.py index 8445a88..c47fb98 100644 --- a/chapter3/spark.py +++ b/chapter3/spark.py @@ -89,12 +89,16 @@ if __name__ == "__main__": .enableHiveSupport() \ .getOrCreate() + count = 0 while True: collect_crawl_info(spark) collect_news(spark) - get_last_day_count(spark) + if count == 0 || count >100: + get_last_day_count(spark) + count = 1 time.sleep(10) + count += 1 spark.stop() \ No newline at end of file