Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
d20cd9b093 | ||
|
2a5c032013 | ||
|
48ea3536cf | ||
|
c33ad58d34 |
BIN
DoubanSpider/douban.db
Normal file
BIN
DoubanSpider/douban.db
Normal file
Binary file not shown.
@ -4,5 +4,5 @@ idna==2.8
|
|||||||
requests==2.22.0
|
requests==2.22.0
|
||||||
SQLAlchemy==1.3.8
|
SQLAlchemy==1.3.8
|
||||||
sqlit==0.1.6
|
sqlit==0.1.6
|
||||||
urllib3==1.25.8
|
urllib3==1.25.3
|
||||||
wincertstore==0.2
|
wincertstore==0.2
|
||||||
|
@ -4,14 +4,14 @@
|
|||||||
```shell
|
```shell
|
||||||
git clone https://github.com/nyaasuki/DoubanBookSpider.git
|
git clone https://github.com/nyaasuki/DoubanBookSpider.git
|
||||||
cd ./DoubanBookSpider
|
cd ./DoubanBookSpider
|
||||||
python3 main.py
|
python3 Spider.py
|
||||||
```
|
```
|
||||||
## Window
|
## Window
|
||||||
配置好环境后
|
配置好环境后
|
||||||
下载并解压https://github.com/nyaasuki/DoubanBookSpider/archive/master.zip
|
下载并解压https://github.com/nyaasuki/DoubanBookSpider/archive/master.zip
|
||||||
使用cmd命令运行
|
使用cmd命令运行
|
||||||
```DOS
|
```DOS
|
||||||
python path\DoubanBookSpider\main.py #path为文件存放路径
|
python path\DoubanBookSpider\Spider.py #path为文件存放路径
|
||||||
```
|
```
|
||||||
|
|
||||||
或者
|
或者
|
||||||
@ -22,5 +22,4 @@ __将已经写好的脚本文件拖拽到当前光标位置,然后敲回车运
|
|||||||
|
|
||||||
|
|
||||||
# 重置程序
|
# 重置程序
|
||||||
删除DoubanBookSpider目录下的douban.db即可
|
删除DoubanBookSpider目录下的Spider.db即可
|
||||||
|
|
4
main.py
4
main.py
@ -19,6 +19,7 @@ class SpiderMain(Cmd):
|
|||||||
print('[Help] tag all - 爬取所有标签下的书籍')
|
print('[Help] tag all - 爬取所有标签下的书籍')
|
||||||
print('[Help] quit - 退出程序')
|
print('[Help] quit - 退出程序')
|
||||||
|
|
||||||
|
|
||||||
def do_start(self, arg):
|
def do_start(self, arg):
|
||||||
for row in url_pool():
|
for row in url_pool():
|
||||||
douban.get_data(row)
|
douban.get_data(row)
|
||||||
@ -42,8 +43,7 @@ class SpiderMain(Cmd):
|
|||||||
|
|
||||||
|
|
||||||
def url_pool():
|
def url_pool():
|
||||||
m = douban.session.query(Douban.url, Douban.tag).all()
|
if not n:
|
||||||
if not m:
|
|
||||||
print('[Spider]你需要先获取tag数据!')
|
print('[Spider]你需要先获取tag数据!')
|
||||||
else:
|
else:
|
||||||
for row in douban.session.query(Douban.url, Douban.tag, Douban.id).all():
|
for row in douban.session.query(Douban.url, Douban.tag, Douban.id).all():
|
||||||
|
Loading…
Reference in New Issue
Block a user