2019-09-08 22:41:58 +08:00
|
|
|
import csv
|
|
|
|
import logging
|
|
|
|
import random
|
|
|
|
import time
|
2019-09-09 18:55:21 +08:00
|
|
|
from cmd import Cmd
|
2019-09-08 22:41:58 +08:00
|
|
|
|
2019-09-09 18:55:21 +08:00
|
|
|
from DoubanSpider.db import Douban
|
2019-09-08 22:41:58 +08:00
|
|
|
from DoubanSpider.Spider import DoubanBook
|
|
|
|
|
2019-09-09 18:55:21 +08:00
|
|
|
|
|
|
|
class SpiderMain(Cmd):
|
|
|
|
def __init__(self):
|
|
|
|
super().__init__()
|
|
|
|
pass
|
|
|
|
|
|
|
|
def do_help(self, arg):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def do_start(self, arg):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def do_tag(self,arg):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def url_pool():
|
|
|
|
for row in douban.session.query(Douban.url, Douban.tag).all():
|
|
|
|
yield row
|
|
|
|
|
|
|
|
|
2019-09-08 22:41:58 +08:00
|
|
|
if __name__ == '__main__':
|
|
|
|
logger = logging.getLogger("PAPA")
|
|
|
|
sleeptime = random.randint(0, 3)
|
|
|
|
with open("results.csv", "a", encoding='utf-8') as f:
|
|
|
|
writer = csv.writer(f)
|
|
|
|
writer.writerow(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
|
|
|
|
writer.writerow(["书名", "作者", "上市时间", "价格", "评分", "书籍分类", "内容简介"])
|
|
|
|
douban = DoubanBook()
|
|
|
|
douban.main()
|