From ee0aea16aa6aec9e141b3258df01850f086ac143 Mon Sep 17 00:00:00 2001 From: guange <8863824@gmail.com> Date: Sun, 13 Jan 2019 11:34:47 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8A=93=E5=8F=96=E8=AF=84=E8=AE=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 + chapter1/.idea/chapter1.iml | 22 + chapter1/.idea/encodings.xml | 4 + chapter1/.idea/misc.xml | 7 + chapter1/.idea/modules.xml | 8 + chapter1/.idea/vcs.xml | 6 + chapter1/.idea/workspace.xml | 661 ++++++++ chapter1/crawler/datas/kind.json | 1508 ----------------- .../__pycache__/__init__.cpython-37.pyc | Bin 144 -> 149 bytes .../taobao/__pycache__/items.cpython-37.pyc | Bin 631 -> 991 bytes .../__pycache__/middlewares.cpython-37.pyc | Bin 4345 -> 4326 bytes .../__pycache__/pipelines.cpython-37.pyc | Bin 704 -> 2258 bytes .../__pycache__/settings.cpython-37.pyc | Bin 569 -> 578 bytes chapter1/crawler/taobao/items.py | 10 + chapter1/crawler/taobao/merge.py | 48 + chapter1/crawler/taobao/pipelines.py | 10 + .../__pycache__/__init__.cpython-37.pyc | Bin 152 -> 157 bytes .../__pycache__/dangdang.cpython-37.pyc | Bin 2374 -> 2381 bytes .../__pycache__/httpbin.cpython-37.pyc | Bin 1950 -> 1955 bytes .../spiders/__pycache__/meitu.cpython-37.pyc | Bin 4891 -> 0 bytes .../spiders/__pycache__/porn.cpython-37.pyc | Bin 701 -> 0 bytes .../spiders/__pycache__/users.cpython-37.pyc | Bin 1397 -> 1402 bytes chapter1/crawler/taobao/spiders/category.py | 100 ++ chapter1/crawler/taobao/spiders/comment.py | 117 ++ 24 files changed, 997 insertions(+), 1508 deletions(-) create mode 100644 chapter1/.idea/chapter1.iml create mode 100644 chapter1/.idea/encodings.xml create mode 100644 chapter1/.idea/misc.xml create mode 100644 chapter1/.idea/modules.xml create mode 100644 chapter1/.idea/vcs.xml create mode 100644 chapter1/.idea/workspace.xml delete mode 100644 chapter1/crawler/datas/kind.json create mode 100644 chapter1/crawler/taobao/merge.py delete mode 100644 chapter1/crawler/taobao/spiders/__pycache__/meitu.cpython-37.pyc delete mode 100644 chapter1/crawler/taobao/spiders/__pycache__/porn.cpython-37.pyc create mode 100644 chapter1/crawler/taobao/spiders/category.py create mode 100644 chapter1/crawler/taobao/spiders/comment.py diff --git a/.gitignore b/.gitignore index ee26f5d..59daf2a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ SprakProject/ chapter1/env *.ipynb +*.pyc +chapter1/crawler/datas/comments/ +chapter1/crawler/datas/comments1/ +chapter1/crawler/datas/products/ diff --git a/chapter1/.idea/chapter1.iml b/chapter1/.idea/chapter1.iml new file mode 100644 index 0000000..ee1e345 --- /dev/null +++ b/chapter1/.idea/chapter1.iml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/chapter1/.idea/encodings.xml b/chapter1/.idea/encodings.xml new file mode 100644 index 0000000..15a15b2 --- /dev/null +++ b/chapter1/.idea/encodings.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/chapter1/.idea/misc.xml b/chapter1/.idea/misc.xml new file mode 100644 index 0000000..92d7ed4 --- /dev/null +++ b/chapter1/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/chapter1/.idea/modules.xml b/chapter1/.idea/modules.xml new file mode 100644 index 0000000..daf6423 --- /dev/null +++ b/chapter1/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/chapter1/.idea/vcs.xml b/chapter1/.idea/vcs.xml new file mode 100644 index 0000000..6c0b863 --- /dev/null +++ b/chapter1/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/chapter1/.idea/workspace.xml b/chapter1/.idea/workspace.xml new file mode 100644 index 0000000..238f824 --- /dev/null +++ b/chapter1/.idea/workspace.xml @@ -0,0 +1,661 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 日本可爱毛球熊猫束发带 + 美式风 + + 皮床 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +