This commit is contained in:
parent
26b2e9efab
commit
41e092becc
|
@ -85,12 +85,12 @@ class JdSpider(scrapy.Spider):
|
||||||
# time.sleep(10)
|
# time.sleep(10)
|
||||||
|
|
||||||
if page>settings['COMMENT_MAX_PAGE']:
|
if page>settings['COMMENT_MAX_PAGE']:
|
||||||
print("评论抓取达到最大深度, %s, 页数: %d", product_id, page)
|
print("评论抓取达到最大深度, %s, 页数: %d" % (product_id, page))
|
||||||
else:
|
else:
|
||||||
yield Request(next_comment_url, self.parse_comment,
|
yield Request(next_comment_url, self.parse_comment,
|
||||||
meta={'product_id': product_id, 'page': page})
|
meta={'product_id': product_id, 'page': page})
|
||||||
else:
|
else:
|
||||||
print("评论抓取完成, %s, 页数: %d", product_id, page)
|
print("评论抓取完成, %s, 页数: %d" % (product_id, page))
|
||||||
|
|
||||||
def parse(self, response):
|
def parse(self, response):
|
||||||
items = response.xpath('//div[@id="plist"]//li[@class="gl-item"]')
|
items = response.xpath('//div[@id="plist"]//li[@class="gl-item"]')
|
||||||
|
|
|
@ -7,42 +7,42 @@ import happybase
|
||||||
|
|
||||||
|
|
||||||
def get_crawl_data_info():
|
def get_crawl_data_info():
|
||||||
connection = happybase.Connection('106.75.85.84', port=40009)
|
connection = happybase.Connection('106.75.85.84', port=40009)
|
||||||
table = connection.table('jd')
|
table = connection.table('jd')
|
||||||
num = 0
|
num = 0
|
||||||
for i in table.scan(scan_batching=True):
|
for i in table.scan(scan_batching=True):
|
||||||
num += 1
|
num += 1
|
||||||
return num
|
return num
|
||||||
|
|
||||||
|
|
||||||
def do_telnet(Host, finish):
|
def do_telnet(Host, finish):
|
||||||
'''Telnet远程登录:Windows客户端连接Linux服务器'''
|
'''Telnet远程登录:Windows客户端连接Linux服务器'''
|
||||||
|
|
||||||
# 连接Telnet服务器
|
# 连接Telnet服务器
|
||||||
tn = telnetlib.Telnet(Host, port=6023, timeout=10)
|
tn = telnetlib.Telnet(Host, port=6023, timeout=10)
|
||||||
tn.set_debuglevel(2)
|
tn.set_debuglevel(2)
|
||||||
|
|
||||||
# 输入登录用户名
|
# 输入登录用户名
|
||||||
out = tn.read_until(finish)
|
out = tn.read_until(finish)
|
||||||
tn.write(b'est()\n')
|
tn.write(b'est()\n')
|
||||||
|
|
||||||
# 输入登录密码
|
# 输入登录密码
|
||||||
out = tn.read_until(finish)
|
out = tn.read_until(finish)
|
||||||
|
|
||||||
tn.close() # tn.write('exit\n')
|
tn.close() # tn.write('exit\n')
|
||||||
|
|
||||||
return out.decode('utf8')
|
return out.decode('utf8')
|
||||||
|
|
||||||
def get_scrapy_info():
|
def get_scrapy_info():
|
||||||
try:
|
try:
|
||||||
response = do_telnet('127.0.0.1', b'>>> ')
|
response = do_telnet('127.0.0.1', b'>>> ')
|
||||||
mm = re.findall(r'(.+?)\s+?:\s+?(.+?)\s+', response)
|
mm = re.findall(r'(.+?)\s+?:\s+?(.+?)\s+', response)
|
||||||
info = {}
|
info = {}
|
||||||
for m in mm:
|
for m in mm:
|
||||||
info[m[0]] = m[1]
|
info[m[0]] = m[1]
|
||||||
return info
|
return info
|
||||||
except:
|
except:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print(get_crawl_data_info())
|
print(get_crawl_data_info())
|
Loading…
Reference in New Issue