from bs4 import BeautifulSoup
import requests
import mysql_test
def space_strip(tag, css):
r = tag.select(css)[0].text.replace('\n', '').strip()
return r
url = 'https://beijing.douban.com/events/week-party'
response = requests.get(url)
# with open('douban_events.html', 'wb') as f:
# f.write(response.content)
soup = BeautifulSoup(response.text, 'lxml')
ul_tag = soup.find('ul', class_="events-list events-list-pic100 events-list-psmall")
li_list = ul_tag.find_all('li', class_='list-entry')
for li_tag in li_list:
title = space_strip(li_tag, 'div.title > a > span')
date = space_strip(li_tag, 'div.info > ul > li')
address = space_strip(li_tag, 'div.info > ul > li:nth-of-type(2)')
fee = space_strip(li_tag, 'div.info > ul > li:nth-of-type(3) > strong')
owner = space_strip(li_tag, 'div.info > ul > li:nth-of-type(4) > a')
data = (title, date, address, fee, owner)
sql = 'insert into douban_act (title, date, address, fee, owner) values (%s, %s ,%s ,%s, %s)'
mc = mysql_test.mysql_connect()
mc.mysql_insert_modify(sql, data)