Release 2.0 #1

Merged
nyaasuki merged 4 commits from Dev into master 2019-09-10 21:12:35 +08:00
3 changed files with 24 additions and 16 deletions
Showing only changes of commit 48ea3536cf - Show all commits

View File

@ -124,19 +124,3 @@ class DoubanBook(object):
else: else:
print('[Spider]检测到现有TAG数据开始抓取...') print('[Spider]检测到现有TAG数据开始抓取...')
self.get_data() self.get_data()
def url_pool():
for row in douban.session.query(Douban.url, Douban.tag).all():
yield row
if __name__ == '__main__':
logger = logging.getLogger("PAPA")
sleeptime = random.randint(0, 3)
with open("results.csv", "a", encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
writer.writerow(["书名", "作者", "上市时间", "价格", "评分", "书籍分类", "内容简介"])
douban = DoubanBook()
douban.main()

View File

@ -15,11 +15,13 @@ class Douban(Base):
def __repr__(self): def __repr__(self):
return "<Douban(id='%d', tag='%s',url='%s')>" % (self.id, self.tag, self.url) return "<Douban(id='%d', tag='%s',url='%s')>" % (self.id, self.tag, self.url)
class Recording(Base): class Recording(Base):
__tablename__ = 'Recording' __tablename__ = 'Recording'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
data = Column(Integer, unique=True, nullable=False) data = Column(Integer, unique=True, nullable=False)
if os.path.isfile('douban.db') is False: if os.path.isfile('douban.db') is False:
print('正在创建数据库...') print('正在创建数据库...')
Base.metadata.create_all() Base.metadata.create_all()

22
main.py
View File

@ -2,9 +2,31 @@ import csv
import logging import logging
import random import random
import time import time
from cmd import Cmd
from DoubanSpider.db import Douban
from DoubanSpider.Spider import DoubanBook from DoubanSpider.Spider import DoubanBook
class SpiderMain(Cmd):
def __init__(self):
super().__init__()
pass
def do_help(self, arg):
pass
def do_start(self, arg):
pass
def do_tag(self,arg):
pass
def url_pool():
for row in douban.session.query(Douban.url, Douban.tag).all():
yield row
if __name__ == '__main__': if __name__ == '__main__':
logger = logging.getLogger("PAPA") logger = logging.getLogger("PAPA")
sleeptime = random.randint(0, 3) sleeptime = random.randint(0, 3)