豆瓣:
from bs4 import BeautifulSoup import requests from day05.mysqlhelper import MysqlHelper url = 'https://beijing.douban.com/events/week-party' response = requests.get(url) helper = MysqlHelper() soup = BeautifulSoup(response.text,'lxml') # print(soup) # with open('douban.html','wb') as f: # f.write(response.content) ul_tag = soup.find('ul',class_='events-list events-list-pic100 events-list-psmall') li_tags = ul_tag.find_all('li',class_='list-entry') for li_tag in li_tags: title = li_tag.select('div.title > a > span')[0].text # print(title) time = li_tag.select('li.event-time')[0].text.replace('\n','').replace(' ','') # print(time) address = li_tag.select('ul.event-meta > li:nth-of-type(2)')[0].text.replace('\n','').replace(' ','') # print(address) fee = li_tag.select('li.fee')[0].text.replace('\n','').replace(' ','') # print(fee) owner = li_tag.select('ul.event-meta > li:nth-of-type(4)')[0].text.replace('\n','').replace(' ','') # print(owner) data = (title, time, address, fee, owner) sql = 'insert into douban(title, `time`, address, fee, owner) values(%s, %s, %s, %s, %s)' helper.execute_modify_sql(sql, data)
Chrome Options:
from selenium import webdriver import time options_chrome = webdriver.ChromeOptions() options_chrome.add_argument('--headless') driver = webdriver.Chrome(chrome_options=options_chrome) time.sleep(1) url = 'http://www.baidu.com' driver.get(url) with open('baidu.html' ,'wb') as f: f.write(driver.page_source.encode('utf-8'))