This commit is contained in:
parent
26b2e9efab
commit
41e092becc
|
@ -85,12 +85,12 @@ class JdSpider(scrapy.Spider):
|
|||
# time.sleep(10)
|
||||
|
||||
if page>settings['COMMENT_MAX_PAGE']:
|
||||
print("评论抓取达到最大深度, %s, 页数: %d", product_id, page)
|
||||
print("评论抓取达到最大深度, %s, 页数: %d" % (product_id, page))
|
||||
else:
|
||||
yield Request(next_comment_url, self.parse_comment,
|
||||
meta={'product_id': product_id, 'page': page})
|
||||
else:
|
||||
print("评论抓取完成, %s, 页数: %d", product_id, page)
|
||||
print("评论抓取完成, %s, 页数: %d" % (product_id, page))
|
||||
|
||||
def parse(self, response):
|
||||
items = response.xpath('//div[@id="plist"]//li[@class="gl-item"]')
|
||||
|
|
|
@ -7,42 +7,42 @@ import happybase
|
|||
|
||||
|
||||
def get_crawl_data_info():
|
||||
connection = happybase.Connection('106.75.85.84', port=40009)
|
||||
table = connection.table('jd')
|
||||
num = 0
|
||||
for i in table.scan(scan_batching=True):
|
||||
num += 1
|
||||
return num
|
||||
|
||||
connection = happybase.Connection('106.75.85.84', port=40009)
|
||||
table = connection.table('jd')
|
||||
num = 0
|
||||
for i in table.scan(scan_batching=True):
|
||||
num += 1
|
||||
return num
|
||||
|
||||
|
||||
def do_telnet(Host, finish):
|
||||
'''Telnet远程登录:Windows客户端连接Linux服务器'''
|
||||
'''Telnet远程登录:Windows客户端连接Linux服务器'''
|
||||
|
||||
# 连接Telnet服务器
|
||||
tn = telnetlib.Telnet(Host, port=6023, timeout=10)
|
||||
tn.set_debuglevel(2)
|
||||
# 连接Telnet服务器
|
||||
tn = telnetlib.Telnet(Host, port=6023, timeout=10)
|
||||
tn.set_debuglevel(2)
|
||||
|
||||
# 输入登录用户名
|
||||
out = tn.read_until(finish)
|
||||
tn.write(b'est()\n')
|
||||
# 输入登录用户名
|
||||
out = tn.read_until(finish)
|
||||
tn.write(b'est()\n')
|
||||
|
||||
# 输入登录密码
|
||||
out = tn.read_until(finish)
|
||||
# 输入登录密码
|
||||
out = tn.read_until(finish)
|
||||
|
||||
tn.close() # tn.write('exit\n')
|
||||
tn.close() # tn.write('exit\n')
|
||||
|
||||
return out.decode('utf8')
|
||||
return out.decode('utf8')
|
||||
|
||||
def get_scrapy_info():
|
||||
try:
|
||||
response = do_telnet('127.0.0.1', b'>>> ')
|
||||
mm = re.findall(r'(.+?)\s+?:\s+?(.+?)\s+', response)
|
||||
info = {}
|
||||
for m in mm:
|
||||
info[m[0]] = m[1]
|
||||
return info
|
||||
except:
|
||||
return {}
|
||||
try:
|
||||
response = do_telnet('127.0.0.1', b'>>> ')
|
||||
mm = re.findall(r'(.+?)\s+?:\s+?(.+?)\s+', response)
|
||||
info = {}
|
||||
for m in mm:
|
||||
info[m[0]] = m[1]
|
||||
return info
|
||||
except:
|
||||
return {}
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(get_crawl_data_info())
|
||||
print(get_crawl_data_info())
|
Loading…
Reference in New Issue