From 41e092beccd7a4c797a375121b815fe27559fcca Mon Sep 17 00:00:00 2001 From: guange <8863824@gmail.com> Date: Thu, 17 Jan 2019 21:23:22 +0800 Subject: [PATCH] . --- .../{settings.py => settings.py.template} | 0 chapter1/crawler/taobao/spiders/jd.py | 4 +- chapter2/mysite/myapp/scrapy_client.py | 56 +++++++++---------- 3 files changed, 30 insertions(+), 30 deletions(-) rename chapter1/crawler/taobao/{settings.py => settings.py.template} (100%) diff --git a/chapter1/crawler/taobao/settings.py b/chapter1/crawler/taobao/settings.py.template similarity index 100% rename from chapter1/crawler/taobao/settings.py rename to chapter1/crawler/taobao/settings.py.template diff --git a/chapter1/crawler/taobao/spiders/jd.py b/chapter1/crawler/taobao/spiders/jd.py index 4f97a33..3f12d53 100644 --- a/chapter1/crawler/taobao/spiders/jd.py +++ b/chapter1/crawler/taobao/spiders/jd.py @@ -85,12 +85,12 @@ class JdSpider(scrapy.Spider): # time.sleep(10) if page>settings['COMMENT_MAX_PAGE']: - print("评论抓取达到最大深度, %s, 页数: %d", product_id, page) + print("评论抓取达到最大深度, %s, 页数: %d" % (product_id, page)) else: yield Request(next_comment_url, self.parse_comment, meta={'product_id': product_id, 'page': page}) else: - print("评论抓取完成, %s, 页数: %d", product_id, page) + print("评论抓取完成, %s, 页数: %d" % (product_id, page)) def parse(self, response): items = response.xpath('//div[@id="plist"]//li[@class="gl-item"]') diff --git a/chapter2/mysite/myapp/scrapy_client.py b/chapter2/mysite/myapp/scrapy_client.py index 5499a21..d71a032 100644 --- a/chapter2/mysite/myapp/scrapy_client.py +++ b/chapter2/mysite/myapp/scrapy_client.py @@ -7,42 +7,42 @@ import happybase def get_crawl_data_info(): - connection = happybase.Connection('106.75.85.84', port=40009) - table = connection.table('jd') - num = 0 - for i in table.scan(scan_batching=True): - num += 1 - return num - + connection = happybase.Connection('106.75.85.84', port=40009) + table = connection.table('jd') + num = 0 + for i in table.scan(scan_batching=True): + num += 1 + return num + def do_telnet(Host, finish): - '''Telnet远程登录:Windows客户端连接Linux服务器''' + '''Telnet远程登录:Windows客户端连接Linux服务器''' - # 连接Telnet服务器 - tn = telnetlib.Telnet(Host, port=6023, timeout=10) - tn.set_debuglevel(2) + # 连接Telnet服务器 + tn = telnetlib.Telnet(Host, port=6023, timeout=10) + tn.set_debuglevel(2) - # 输入登录用户名 - out = tn.read_until(finish) - tn.write(b'est()\n') + # 输入登录用户名 + out = tn.read_until(finish) + tn.write(b'est()\n') - # 输入登录密码 - out = tn.read_until(finish) + # 输入登录密码 + out = tn.read_until(finish) - tn.close() # tn.write('exit\n') + tn.close() # tn.write('exit\n') - return out.decode('utf8') + return out.decode('utf8') def get_scrapy_info(): - try: - response = do_telnet('127.0.0.1', b'>>> ') - mm = re.findall(r'(.+?)\s+?:\s+?(.+?)\s+', response) - info = {} - for m in mm: - info[m[0]] = m[1] - return info - except: - return {} + try: + response = do_telnet('127.0.0.1', b'>>> ') + mm = re.findall(r'(.+?)\s+?:\s+?(.+?)\s+', response) + info = {} + for m in mm: + info[m[0]] = m[1] + return info + except: + return {} if __name__ == "__main__": - print(get_crawl_data_info()) \ No newline at end of file + print(get_crawl_data_info()) \ No newline at end of file