在db文件夹中dbmysql.py,在db文件夹中新增__init__.py,内容为空
import pymysql
class DBMysql:
host = 'localhost'
username = 'root'
password = 'root'
db_name = 'test'
@classmethod
def connect_mysql(self):
connect = pymysql.connect(host=self.host, user=self.username, password=self.password, database=self.db_name)
return connect
@classmethod
def insert_table(self, sql):
db = self.connect_mysql()
cur = db.cursor()
insert_id = 0
try:
cur.execute(sql)
insert_id = int(db.insert_id())
db.commit()
except pymysql.Error as e:
print("创建数据库连接失败|Mysql Error %d: %s" % (e.args[0], e.args[1]))
print(sql)
db.rollback()
db.close()
return insert_id
@classmethod
def find_sql(self,sql):
db = self.connect_mysql()
cur = db.cursor()
cur.execute(sql)
obj = cur.fetchone()
db.close()
return obj
在sele文件夹中index.py
import sys
sys.path.append('..')
import time
import random
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from db.dbmysql import DBMysql
#判断元素是否存在
def isElementExist(obj, element):
try:
obj.find_element_by_css_selector(element)
return True
except:
return False
def search_dangdang():
#实现不关闭的重点
option = webdriver.ChromeOptions()
num = str(float(random.randint(500, 600)))
option.add_experimental_option("detach", True)
option.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/{}"
" (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/{}".format(num, num))
# 禁止图片和css加载
prefs = {
"profile.default_content_setting_values": {
"images": 2,
#"javascript": 2
},
'permissions.default.stylesheet': 2
}
option.add_experimental_option("prefs", prefs)
url = "http://www.dangdang.com/";
driver = webdriver.Chrome('D:\Program Files (x86)\Python\Python310\chromedriver.exe', options=option)
driver.get(url)
#获取查询框输入查询信息
driver.find_element(By.ID, value="key_S").send_keys("python")
#点击查询按钮
button = driver.find_element(By.CLASS_NAME, value="button")
driver.execute_script("$(arguments[0]).click()", button)
get_ddbook(driver)
def get_ddbook(driver):
# 获取查询出来的书本名
book_ul = driver.find_element(By.CLASS_NAME, value="bigimg")
book_li_list = book_ul.find_elements(By.TAG_NAME, value="li")
add_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
id_list = []
for book in book_li_list:
#sku
sku = ""
if book.get_attribute("sku"):
sku = str(book.get_attribute("sku"))
#网址
book_url = ""
if isElementExist(book, "a[name=itemlist-picture]"):
url_obj = book.find_element(by=By.CSS_SELECTOR, value="a[name=itemlist-picture]")
book_url = url_obj.get_attribute("href")
# 书本名
title = ""
if isElementExist(book, "p.name> a"):
title_obj = book.find_element_by_css_selector("p.name> a")
title = title_obj.get_attribute("title").replace("'", "").strip()
# 价格
price = 0
if isElementExist(book, "p.price> span.search_now_price"):
price_obj = book.find_element_by_css_selector("p.price> span.search_now_price")
price = price_obj.text.replace('¥', '').strip()
# 原来价格
ori_price = 0
if isElementExist(book, "p.price> span.search_pre_price"):
ori_price_obj = book.find_element_by_css_selector("p.price> span.search_pre_price")
ori_price = ori_price_obj.text.replace('¥', '').strip()
# 作者
author = ""
if isElementExist(book, "p.search_book_author> span> a[name=itemlist-author]"):
author_obj = book.find_element_by_css_selector("p.search_book_author> span> a[name=itemlist-author]")
author = author_obj.get_attribute("title").replace("'", "").strip()
# 时间
time1 = ""
if isElementExist(book, "p.search_book_author> span"):
time_obj = book.find_elements_by_css_selector("p.search_book_author> span")
time1 = time_obj[1].text.replace('/', '').replace("'", "").replace(' ', '').strip()
# 出版社
pcbs = ""
if isElementExist(time_obj[2], "a[name=P_cbs]"):
pcbs_obj = time_obj[2].find_element_by_css_selector("a[name=P_cbs]")
pcbs = pcbs_obj.get_attribute("title").replace("'", "").strip()
#评论数
pl_count = 0
if isElementExist(book, "p.search_star_line>a[name=itemlist-review]"):
pl_obj = book.find_element(by=By.CSS_SELECTOR, value="p.search_star_line>a[name=itemlist-review]")
if pl_obj:
pl_count = pl_obj.text.strip().replace('条评论', '')
book_obj = DBMysql.find_sql("select title from dd_book where sku='" + sku + "'")
if not book_obj:
sql = ("insert into dd_book(sku,url,title,price,ori_price,author,pcb_time,pcbs,pl_count,add_time) "
" values('" + sku + "','" + book_url + "','" + title + "','" + str(price) + "','" + str(ori_price) + "','" +
author + "','" + time1 + "','" + pcbs + "','" + str(pl_count) + "','" + add_time + "')")
id = DBMysql.insert_table(sql)
id_list.append(id)
else:
print(title)
print(id_list)
try:
a_next = driver.find_element(by=By.CLASS_NAME, value="next")
a_next.find_element(by=By.TAG_NAME, value="a").click()
time.sleep(1)
get_ddbook(driver)
except Exception as e:
print(e)
sys.exit()
search_dangdang()