From 7b669f833bb5125c5da4e4c44ef18919a0aca98a Mon Sep 17 00:00:00 2001 From: guange <8863824@gmail.com> Date: Fri, 18 Jan 2019 10:26:32 +0800 Subject: [PATCH] . --- chapter2/mysite/myapp/views.py | 2 +- chapter3/spark.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/chapter2/mysite/myapp/views.py b/chapter2/mysite/myapp/views.py index 9193f07..9ba8c08 100644 --- a/chapter2/mysite/myapp/views.py +++ b/chapter2/mysite/myapp/views.py @@ -13,7 +13,7 @@ def crawl(request): # info = scrapy_client.get_scrapy_info() crawl_info = CrawlInfos.objects.order_by('-id').first() platform_info = get_platform_info() - news = News.objects.order_by('-id')[0:20] + news = News.objects.order_by('-id').all()[0:20] return render(request, 'myapp/crawl.html', {"crawl_info": crawl_info, "platform_info":json.dumps(platform_info), "news": news}) diff --git a/chapter3/spark.py b/chapter3/spark.py index d51df35..b391f8f 100644 --- a/chapter3/spark.py +++ b/chapter3/spark.py @@ -52,6 +52,7 @@ def collect_crawl_info(spark): def collect_news(spark): df = spark.sql("select * from jd_comment order by created_at desc limit 20") + mysql_execute("delete from news") for row in df.rdd.collect(): mysql_execute("insert into news (comment_time, content, comment_id) values ('{}', '{}', '{}')".format( row["comment_time"], row["content"], row["id"]))