Release 2.0 #1
1
.gitignore
vendored
1
.gitignore
vendored
@ -137,3 +137,4 @@ dmypy.json
|
|||||||
/results.csv
|
/results.csv
|
||||||
.idea/dataSources.local.xml
|
.idea/dataSources.local.xml
|
||||||
.gitignore
|
.gitignore
|
||||||
|
.vscode/launch.json
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from DoubanSpider import *
|
from DoubanSpider import *
|
||||||
from DoubanSpider.db import Douban, engine
|
from DoubanSpider.db import Douban, engine, Recording
|
||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
|
|
||||||
|
|
||||||
@ -51,7 +51,6 @@ class DoubanBook(object):
|
|||||||
elif do_not_get_all == '2':
|
elif do_not_get_all == '2':
|
||||||
user_tag = input('请输入标签:')
|
user_tag = input('请输入标签:')
|
||||||
self.get_url(user_tag)
|
self.get_url(user_tag)
|
||||||
self.main()
|
|
||||||
else:
|
else:
|
||||||
print("[Spider]输入有误,请重新输入!")
|
print("[Spider]输入有误,请重新输入!")
|
||||||
self.get_tags()
|
self.get_tags()
|
||||||
@ -64,7 +63,7 @@ class DoubanBook(object):
|
|||||||
# self.get_data(books_url, tag_name)
|
# self.get_data(books_url, tag_name)
|
||||||
|
|
||||||
def get_data(self):
|
def get_data(self):
|
||||||
for row in self.session.query(Douban.url, Douban.tag).all():
|
for row in self.session.query(Douban.url, Douban.tag, Douban.id).all():
|
||||||
time.sleep(sleeptime)
|
time.sleep(sleeptime)
|
||||||
print(f"正在解析:{row[0]}")
|
print(f"正在解析:{row[0]}")
|
||||||
response = requests.get(row[0], headers=self.headers)
|
response = requests.get(row[0], headers=self.headers)
|
||||||
@ -115,6 +114,10 @@ class DoubanBook(object):
|
|||||||
writer.writerow(data)
|
writer.writerow(data)
|
||||||
|
|
||||||
def main(self):
|
def main(self):
|
||||||
|
rec = self.session.query(Recording.id).all()
|
||||||
|
if not rec:
|
||||||
|
self.session.add(Recording(id=1, data=1))
|
||||||
|
self.session.commit()
|
||||||
n = self.session.query(Douban.url, Douban.tag).all()
|
n = self.session.query(Douban.url, Douban.tag).all()
|
||||||
if not n:
|
if not n:
|
||||||
self.get_tags()
|
self.get_tags()
|
||||||
@ -123,9 +126,14 @@ class DoubanBook(object):
|
|||||||
self.get_data()
|
self.get_data()
|
||||||
|
|
||||||
|
|
||||||
|
def url_pool():
|
||||||
|
for row in douban.session.query(Douban.url, Douban.tag).all():
|
||||||
|
yield row
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
logger = logging.getLogger("PAPA")
|
logger = logging.getLogger("PAPA")
|
||||||
sleeptime = random.randint(0,3)
|
sleeptime = random.randint(0, 3)
|
||||||
with open("results.csv", "a", encoding='utf-8') as f:
|
with open("results.csv", "a", encoding='utf-8') as f:
|
||||||
writer = csv.writer(f)
|
writer = csv.writer(f)
|
||||||
writer.writerow(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
|
writer.writerow(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
|
@ -15,6 +15,10 @@ class Douban(Base):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<Douban(id='%d', tag='%s',url='%s')>" % (self.id, self.tag, self.url)
|
return "<Douban(id='%d', tag='%s',url='%s')>" % (self.id, self.tag, self.url)
|
||||||
|
|
||||||
|
class Recording(Base):
|
||||||
|
__tablename__ = 'Recording'
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
data = Column(Integer, unique=True, nullable=False)
|
||||||
|
|
||||||
if os.path.isfile('douban.db') is False:
|
if os.path.isfile('douban.db') is False:
|
||||||
print('正在创建数据库...')
|
print('正在创建数据库...')
|
||||||
|
16
main.py
Normal file
16
main.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
import csv
|
||||||
|
import logging
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
|
||||||
|
from DoubanSpider.Spider import DoubanBook
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logger = logging.getLogger("PAPA")
|
||||||
|
sleeptime = random.randint(0, 3)
|
||||||
|
with open("results.csv", "a", encoding='utf-8') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
|
||||||
|
writer.writerow(["书名", "作者", "上市时间", "价格", "评分", "书籍分类", "内容简介"])
|
||||||
|
douban = DoubanBook()
|
||||||
|
douban.main()
|
Loading…
Reference in New Issue
Block a user