bigdata/chapter1/crawler/demo.py

import re,requests,json

s = requests.session()
url = 'https://club.jd.com/comment/productPageComments.action'
data = {
'callback':'fetchJSON_comment98vv61',
'productId':'3888284',
'score':0,
'sortType':5,
'pageSize':10,
'isShadowSku':0,
'page':0
}

while True:
    t = s.get(url,params = data).text
    try:
        t = re.search(r'(?<=fetchJSON_comment98vv61\().*(?=\);)',t).group(0)
    except Exception as e:
        break

    j = json.loads(t)
    commentSummary = j['comments']
    for comment in commentSummary:
        c_content = comment['content']
        c_time = comment['referenceTime']
        c_name = comment['nickname']
        c_client = comment['userClientShow']
        print('{} {} {}\n{}\n'.format(c_name,c_time,c_client,c_content))

    data['page'] += 1