需要安装:
pip install pymysql
代码:
import scrapy
import pymysql # 导入 pymysql
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time
import os
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
class SavearrSpider(scrapy.Spider):
name = "savearr"
allowed_domains = ["***.com"]
start_urls = ["https://www.***.com"]
def __init__(self, *args, **kwargs):
super(SavearrSpider, self).__init__(*args, **kwargs)
chrome_options = Options()
chrome_options.add_argument("--start-maximized") # 启动最大化
chrome_options.add_argument("--disable-extensions") # 禁用扩展程序
chrome_options.add_argument("--disable-gpu") # 禁用GPU加速
driver_path = r'D:\software\chromedriver\chromedriver127\chromedriver-win64\chromedriver.exe' # 确保chromedriver在环境变量中
service = Service(executable_path=driver_path)
self.driver = webdriver.Chrome(service=service, options=chrome_options)
# 初始化数据库连接
self.db_connection = pymysql.connect(
host='localhost',
user='root', # 替换为您的MySQL用户名
password='root', # 替换为您的MySQL密码
database='test', # 数据库名
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
def parse(self, response):
# 使用 Selenium 访问起始URL
self.driver.get(response.url)
# input("请在浏览器中进行操作,然后在这里按回车键继续...")
# new_url = self.driver.current_url
# self.driver.get(new_url)
page_source = self.driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
gdposter_box = soup.find('div', class_='gdposter_box')
if gdposter_box:
table = gdposter_box.find('ul')
if table:
companies = table.find_all('li')
company_data = []
for company in companies:
company_info = {}
company_info['title'] = company.find('p').get_text()
company_data.append(company_info)
# 保存数据到MySQL
self.save_to_mysql(company_data)
else:
print("未找到指定的 元素。")
else:
print("未找到 元素。")
self.driver.quit()
def save_to_mysql(self, company_data):
with self.db_connection.cursor() as cursor:
for company in company_data:
sql = "INSERT INTO companies (title) VALUES (%s)"
cursor.execute(sql, (company['title'],))
self.db_connection.commit()
def close(self, reason):
self.driver.quit()
self.db_connection.close()