import requests
from bs4 import BeautifulSoup
import pandas as pd
url = 'https://www.sequoiacap.com/china/companies/'
headefs = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
}
res = requests.get(url, headers=headefs)
soup = BeautifulSoup(res.text, 'html.parser')
items = soup.find('ul', class_='companies _column')
names = items.find_all('li', class_='js-company-li')
titles = []
descris = []
df = pd.DataFrame()
for name in names:
title = name.find('div', class_='_name').text
descri = name.find('div', class_='_description').text
titles.append(title)
descris.append(descri)
df['名称']=titles
df['描述']=descris
df.to_csv('0011.csv',encoding='gb18030', index=None)
爬虫基础训练(二)
最新推荐文章于 2024-08-21 21:05:18 发布