bigdata/chapter1/crawler/demo.py

45 lines
1.1 KiB
Python

import re
import requests
import json
s = requests.session()
url = "https://club.jd.com/comment/productPageComments.action"
data = {
'callback': 'fetchJSON_comment98vv61',
'productId': '3888284',
'score': 0,
'sortType': 5,
'pageSize': 10,
'isShadowSku': 0,
'page': 0
}
def main():
while True:
t = s.get(url, params=data).text
try:
t = re.search(
r'(?<=fetchjson_comment98vv61\().*(?=\);)', t).group(0)
except Exception as e:
print(e)
j = json.loads(t)
commentsummary = j["comments"]
for comment in commentsummary:
c_content = comment["content"]
c_time = comment['referencetime']
c_name = comment['nickname']
c_client = comment['userclientshow']
print('{} {} {}\n{}\n'.format(c_name, c_time, c_client, c_content))
data['page'] += 1
if __name__ == "__main__":
import time
import datetime
oldtime = datetime.timedelta(days=1)
print(oldtime)
print(datetime.datetime.now()-oldtime)