豆瓣聚会的数据爬取代码

豆瓣:

 

from bs4 import BeautifulSoup
import requests
from day05.mysqlhelper import MysqlHelper
url = 'https://beijing.douban.com/events/week-party'

response = requests.get(url)
helper = MysqlHelper()
soup = BeautifulSoup(response.text,'lxml')
# print(soup)
# with open('douban.html','wb') as f:
#     f.write(response.content)

ul_tag = soup.find('ul',class_='events-list events-list-pic100 events-list-psmall')
li_tags = ul_tag.find_all('li',class_='list-entry')

for li_tag in li_tags:
    title = li_tag.select('div.title > a > span')[0].text
    # print(title)
    time = li_tag.select('li.event-time')[0].text.replace('\n','').replace(' ','')
    # print(time)
    address = li_tag.select('ul.event-meta > li:nth-of-type(2)')[0].text.replace('\n','').replace(' ','')
    # print(address)
    fee = li_tag.select('li.fee')[0].text.replace('\n','').replace(' ','')
    # print(fee)
    owner = li_tag.select('ul.event-meta > li:nth-of-type(4)')[0].text.replace('\n','').replace(' ','')
    # print(owner)
    data = (title, time, address, fee, owner)
    sql = 'insert into douban(title, `time`, address, fee, owner) values(%s, %s, %s, %s, %s)'
    helper.execute_modify_sql(sql, data)

 

Chrome Options:

from selenium import webdriver
import time
options_chrome = webdriver.ChromeOptions()
options_chrome.add_argument('--headless')

driver = webdriver.Chrome(chrome_options=options_chrome)
time.sleep(1)
url = 'http://www.baidu.com'
driver.get(url)

with open('baidu.html' ,'wb') as f:
    f.write(driver.page_source.encode('utf-8'))
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值