diff --git a/chapter1/crawler/taobao/utils.py b/chapter1/crawler/taobao/utils.py index 3660494..b5d8f8d 100644 --- a/chapter1/crawler/taobao/utils.py +++ b/chapter1/crawler/taobao/utils.py @@ -1,10 +1,11 @@ # coding=utf-8 -import time -import requests -import re -import time import json +import re +import sys +import time + import pymysql +import requests def mysql_query(sql): @@ -17,13 +18,13 @@ def mysql_query(sql): return data -def mysql_execute(sql): +def mysql_execute(sql, port=3306): print("execute: %s" % sql) db = pymysql.connect(host="127.0.0.1", user="root", password="123456789", database="sparkproject", - port=12346) + port=port) cursor = db.cursor() try: @@ -59,7 +60,7 @@ def check_alive_proxy(ip, port): raise RuntimeError("连接出错") -if __name__ == "__main__": +def proxy(port): r = requests.get( 'https://raw.githubusercontent.com/fate0/proxylist/master/proxy.list') print(r.status_code) @@ -73,7 +74,17 @@ if __name__ == "__main__": print("%s:%d" % (o["host"], o["port"])) mysql_execute("insert into proxys (ip,port,checktime) values ('{}', {}, {})".format( o['host'], o['port'], check_time - )) + ), port) except Exception as e: # print(e) pass + + +if __name__ == "__main__": + port = 3306 + if len(sys.argv) > 1: + port = int(sys.argv[1]) + + while True: + proxy(port) + time.sleep(15*60)