Release 2.0 #1
@ -124,19 +124,3 @@ class DoubanBook(object):
|
|||||||
else:
|
else:
|
||||||
print('[Spider]检测到现有TAG数据,开始抓取...')
|
print('[Spider]检测到现有TAG数据,开始抓取...')
|
||||||
self.get_data()
|
self.get_data()
|
||||||
|
|
||||||
|
|
||||||
def url_pool():
|
|
||||||
for row in douban.session.query(Douban.url, Douban.tag).all():
|
|
||||||
yield row
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
logger = logging.getLogger("PAPA")
|
|
||||||
sleeptime = random.randint(0, 3)
|
|
||||||
with open("results.csv", "a", encoding='utf-8') as f:
|
|
||||||
writer = csv.writer(f)
|
|
||||||
writer.writerow(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
|
|
||||||
writer.writerow(["书名", "作者", "上市时间", "价格", "评分", "书籍分类", "内容简介"])
|
|
||||||
douban = DoubanBook()
|
|
||||||
douban.main()
|
|
||||||
|
@ -15,11 +15,13 @@ class Douban(Base):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<Douban(id='%d', tag='%s',url='%s')>" % (self.id, self.tag, self.url)
|
return "<Douban(id='%d', tag='%s',url='%s')>" % (self.id, self.tag, self.url)
|
||||||
|
|
||||||
|
|
||||||
class Recording(Base):
|
class Recording(Base):
|
||||||
__tablename__ = 'Recording'
|
__tablename__ = 'Recording'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
data = Column(Integer, unique=True, nullable=False)
|
data = Column(Integer, unique=True, nullable=False)
|
||||||
|
|
||||||
|
|
||||||
if os.path.isfile('douban.db') is False:
|
if os.path.isfile('douban.db') is False:
|
||||||
print('正在创建数据库...')
|
print('正在创建数据库...')
|
||||||
Base.metadata.create_all()
|
Base.metadata.create_all()
|
||||||
|
22
main.py
22
main.py
@ -2,9 +2,31 @@ import csv
|
|||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
from cmd import Cmd
|
||||||
|
|
||||||
|
from DoubanSpider.db import Douban
|
||||||
from DoubanSpider.Spider import DoubanBook
|
from DoubanSpider.Spider import DoubanBook
|
||||||
|
|
||||||
|
|
||||||
|
class SpiderMain(Cmd):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_help(self, arg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_start(self, arg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_tag(self,arg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def url_pool():
|
||||||
|
for row in douban.session.query(Douban.url, Douban.tag).all():
|
||||||
|
yield row
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
logger = logging.getLogger("PAPA")
|
logger = logging.getLogger("PAPA")
|
||||||
sleeptime = random.randint(0, 3)
|
sleeptime = random.randint(0, 3)
|
||||||
|
Loading…
Reference in New Issue
Block a user