我试图从一个使用selenium的旅游网站获取数据。我可以提取CSV中的数据,但我无法将数据插入mysql数据库。在import requests
from bs4 import BeautifulSoup
import csv
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import unittest
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
import unittest
import re
import sys
import urllib
import json
import sys, mysql.connector
import csv
import mysql
user_agent = {'User-agent': 'Chrome/43.0.2357.124'}
output_file = open("Excel.csv", "w", newline='')
class Crawling(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.set_window_size(1024, 768)
self.base_url = "https://www.ctrip.com/"
self.accept_next_alert = True
def test_sel(self):
driver = self.driver
delay = 3
driver.get(self.base_url + "Search/new york")
for i in range(1,2):
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
time.sleep(2)
html_source = driver.page_source
data = html_source.encode("utf-8")
elements = driver.find_elements_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li[1]/div/div[1]")
innerElements = 15
outerElements = len(elements)/innerElements
#print(innerElements, "\t", outerElements, "\t", len(elements))
for j in range(1, 20):
price = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/span[1]").text
headline = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/strong").text
deeplink = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[3]/div/ul/li[1]/a").get_attribute("href")
if not all([headline, price]):
print("Header not available " " | " + "Price not available " + " | " + "Deeplink: " + str(deeplink))
headline = "Not available as well as price"
else:
print("Header: " + headline + " | " + "Price: " + price[4:] + " | " + "Deeplink: " + str(deeplink))
writer = csv.writer(output_file)
csv_fields = ['Header', 'Price', 'Deeplink', 'PartnerID', 'LocationID']
if elements:
writer.writerow([headline, price[4:], deeplink, partner_ID, location_ID])
if __name__ == "__main__":
unittest.main()
这是我应该能够将其提取到数据库中的附加代码:
^{pr2}$
但问题是它不能提取到数据库中。你们能帮我一下吗/给我个提示吗?欢迎任何反馈