This commit is contained in:
guange 2019-01-17 21:23:22 +08:00
parent 26b2e9efab
commit 41e092becc
3 changed files with 30 additions and 30 deletions

View File

@ -85,12 +85,12 @@ class JdSpider(scrapy.Spider):
# time.sleep(10) # time.sleep(10)
if page>settings['COMMENT_MAX_PAGE']: if page>settings['COMMENT_MAX_PAGE']:
print("评论抓取达到最大深度, %s, 页数: %d", product_id, page) print("评论抓取达到最大深度, %s, 页数: %d" % (product_id, page))
else: else:
yield Request(next_comment_url, self.parse_comment, yield Request(next_comment_url, self.parse_comment,
meta={'product_id': product_id, 'page': page}) meta={'product_id': product_id, 'page': page})
else: else:
print("评论抓取完成, %s, 页数: %d", product_id, page) print("评论抓取完成, %s, 页数: %d" % (product_id, page))
def parse(self, response): def parse(self, response):
items = response.xpath('//div[@id="plist"]//li[@class="gl-item"]') items = response.xpath('//div[@id="plist"]//li[@class="gl-item"]')

View File

@ -7,42 +7,42 @@ import happybase
def get_crawl_data_info(): def get_crawl_data_info():
connection = happybase.Connection('106.75.85.84', port=40009) connection = happybase.Connection('106.75.85.84', port=40009)
table = connection.table('jd') table = connection.table('jd')
num = 0 num = 0
for i in table.scan(scan_batching=True): for i in table.scan(scan_batching=True):
num += 1 num += 1
return num return num
def do_telnet(Host, finish): def do_telnet(Host, finish):
'''Telnet远程登录Windows客户端连接Linux服务器''' '''Telnet远程登录Windows客户端连接Linux服务器'''
# 连接Telnet服务器 # 连接Telnet服务器
tn = telnetlib.Telnet(Host, port=6023, timeout=10) tn = telnetlib.Telnet(Host, port=6023, timeout=10)
tn.set_debuglevel(2) tn.set_debuglevel(2)
# 输入登录用户名 # 输入登录用户名
out = tn.read_until(finish) out = tn.read_until(finish)
tn.write(b'est()\n') tn.write(b'est()\n')
# 输入登录密码 # 输入登录密码
out = tn.read_until(finish) out = tn.read_until(finish)
tn.close() # tn.write('exit\n') tn.close() # tn.write('exit\n')
return out.decode('utf8') return out.decode('utf8')
def get_scrapy_info(): def get_scrapy_info():
try: try:
response = do_telnet('127.0.0.1', b'>>> ') response = do_telnet('127.0.0.1', b'>>> ')
mm = re.findall(r'(.+?)\s+?:\s+?(.+?)\s+', response) mm = re.findall(r'(.+?)\s+?:\s+?(.+?)\s+', response)
info = {} info = {}
for m in mm: for m in mm:
info[m[0]] = m[1] info[m[0]] = m[1]
return info return info
except: except:
return {} return {}
if __name__ == "__main__": if __name__ == "__main__":
print(get_crawl_data_info()) print(get_crawl_data_info())