bigdata/chapter1/crawler/demo.py

31 lines
778 B
Python

import re,requests,json
s = requests.session()
url = 'https://club.jd.com/comment/productPageComments.action'
data = {
'callback':'fetchJSON_comment98vv61',
'productId':'3888284',
'score':0,
'sortType':5,
'pageSize':10,
'isShadowSku':0,
'page':0
}
while True:
t = s.get(url,params = data).text
try:
t = re.search(r'(?<=fetchJSON_comment98vv61\().*(?=\);)',t).group(0)
except Exception as e:
break
j = json.loads(t)
commentSummary = j['comments']
for comment in commentSummary:
c_content = comment['content']
c_time = comment['referenceTime']
c_name = comment['nickname']
c_client = comment['userClientShow']
print('{} {} {}\n{}\n'.format(c_name,c_time,c_client,c_content))
data['page'] += 1