方法一:
import time
import pymysql
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
db = pymysql.connect(host='1', user='1', password='1', database='1')
cursor = db.cursor()
def insert_data(province, city, url, logger, db, cursor):
try:
sql = 'insert into 58city_info(`province`,`city`,`url`)values (%s,%s,%s)'
cursor.execute(sql, (province, city, url))
db.commit()
except Exception as e:
print(e)
print('connect_mysql 34line')
db.rollback()
def br():
chrome_options = Options()
browser = webdriver.Chrome(options=chrome_options)
browser.get('https://www.58.com/changecity.html?fullpath=0&PGTID=0d100000-0005-d7dc-68c3-6a71c706285a&ClickID=2')
browser.set_page_load_timeout(15)
browser.set_script_timeout(15)
all_city(browser)
browser.quit()
def all_city(browser):
lists_1 = browser.find_elements_by_xpath('//*[@class="content-province"]')
for i in lists_1:
province_1 = i.find_element_by_xpath('.//*[@class="content-province-title"]').text
city_1 = i.find_elements_by_xpath('.//div[@class="content-cities"]')
for j in city_1:
city_2 = [city.text for city in j.find_elements_by_xpath('.//a[@class="content-city"]')]
link = [link.get_attribute('href') for link in j.find_elements_by_xpath('.//a[@class="content-city"]')]
for k in range(len(city_2)):
city = city_2[k]
link_ = link[k]
print(city, link_)
insert_data(province=province_1,city=city_2,url=link,logger='',db=db, cursor=cursor)
if __name__ == '__main__':
br()
关于for k in city_2:
city_3=city_2[k] 会出错的原因
for city in city_2:
city = city
index_ = city_2.index(city)
link_ = link[index_]
print(city, link_)
for k in range(len(city_2)):
city = city_2[k]
link_ = link[k]
print(city, link_)
两种方法可以解决
方法二:
import time
import pymysql
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
db = pymysql.connect(host='1', user='1', password='1', database='1')
cursor = db.cursor()
def insert_data(province, city, url, db, cursor):
try:
sql = 'insert into 58city_info(`province`,`city`,`url`)values (%s,%s,%s)'
cursor.execute(sql, (province, city, url))
db.commit()
except Exception as e:
print(e)
print('connect_mysql 34line')
db.rollback()
def br():
chrome_options = Options()
browser = webdriver.Chrome(options=chrome_options)
browser.get('https://www.58.com/changecity.html?fullpath=0&PGTID=0d100000-0005-d7dc-68c3-6a71c706285a&ClickID=2')
browser.set_page_load_timeout(15)
browser.set_script_timeout(15)
all_city(browser)
browser.quit()
cursor.close()
db.close()
def all_city(browser):
lists_1 = browser.find_elements_by_xpath('//*[@class="content-province"]')
for i in lists_1:
province_1 = i.find_element_by_xpath('.//*[@class="content-province-title"]').text
city_1 = i.find_element_by_xpath('.//div[@class="content-cities"]')
all_citys = city_1.find_elements_by_xpath(".//a")
for j in all_citys:
city = j.text
link = j.get_attribute("href")
insert_data(province=province_1, city=city, url=link, db=db, cursor=cursor)
if __name__ == '__main__':
br()
关于数据库建表
1.给字段加索引
给city字段加唯一的索引,避免重复。
其他值默认索引
2.建立数据库表
爬取成功