This commit is contained in:
guange 2019-01-17 21:23:22 +08:00
parent 26b2e9efab
commit 41e092becc
3 changed files with 30 additions and 30 deletions

View File

@ -85,12 +85,12 @@ class JdSpider(scrapy.Spider):
# time.sleep(10)
if page>settings['COMMENT_MAX_PAGE']:
print("评论抓取达到最大深度, %s, 页数: %d", product_id, page)
print("评论抓取达到最大深度, %s, 页数: %d" % (product_id, page))
else:
yield Request(next_comment_url, self.parse_comment,
meta={'product_id': product_id, 'page': page})
else:
print("评论抓取完成, %s, 页数: %d", product_id, page)
print("评论抓取完成, %s, 页数: %d" % (product_id, page))
def parse(self, response):
items = response.xpath('//div[@id="plist"]//li[@class="gl-item"]')

View File

@ -7,42 +7,42 @@ import happybase
def get_crawl_data_info():
connection = happybase.Connection('106.75.85.84', port=40009)
table = connection.table('jd')
num = 0
for i in table.scan(scan_batching=True):
num += 1
return num
connection = happybase.Connection('106.75.85.84', port=40009)
table = connection.table('jd')
num = 0
for i in table.scan(scan_batching=True):
num += 1
return num
def do_telnet(Host, finish):
'''Telnet远程登录Windows客户端连接Linux服务器'''
'''Telnet远程登录Windows客户端连接Linux服务器'''
# 连接Telnet服务器
tn = telnetlib.Telnet(Host, port=6023, timeout=10)
tn.set_debuglevel(2)
# 连接Telnet服务器
tn = telnetlib.Telnet(Host, port=6023, timeout=10)
tn.set_debuglevel(2)
# 输入登录用户名
out = tn.read_until(finish)
tn.write(b'est()\n')
# 输入登录用户名
out = tn.read_until(finish)
tn.write(b'est()\n')
# 输入登录密码
out = tn.read_until(finish)
# 输入登录密码
out = tn.read_until(finish)
tn.close() # tn.write('exit\n')
tn.close() # tn.write('exit\n')
return out.decode('utf8')
return out.decode('utf8')
def get_scrapy_info():
try:
response = do_telnet('127.0.0.1', b'>>> ')
mm = re.findall(r'(.+?)\s+?:\s+?(.+?)\s+', response)
info = {}
for m in mm:
info[m[0]] = m[1]
return info
except:
return {}
try:
response = do_telnet('127.0.0.1', b'>>> ')
mm = re.findall(r'(.+?)\s+?:\s+?(.+?)\s+', response)
info = {}
for m in mm:
info[m[0]] = m[1]
return info
except:
return {}
if __name__ == "__main__":
print(get_crawl_data_info())
print(get_crawl_data_info())