from bs4 import BeautifulSoup,Comment
import requests
def get_name(url):
test = {'company_name': [],'code':[],'city':[],'industry':[],'register':[],'income':[],'profit':[]}
req = requests.get(url, headers=headers, verify=False)
soup = BeautifulSoup(req.text, 'html.parser')
if soup.find_all('ul', {'class': 'hot-search clear'}):
for i in soup.find_all('ul', {'class': 'hot-search clear'}):
if i.find_all('div', {'class': 'inf'}):
for j in i.find_all('div', {'class': 'inf'}):
name = j.find('h2').text #公司名
test['company_name'].append(name)
test['code'].append(code)
info = (j.findAll(text=lambda text: isinstance(text, Comment)))[1]
info = BeautifulSoup(info, 'html.parser')
for s in info.find_all('p'):
if s.text == '注册资金':
register=s.find_next('span').text
test['register'].append(register)
if s.text == '营业收入':
income = s.find_next('span').text
test['income'].append(income)
if s.text == '净利润':
profit = s.find_next('span').text
test['profit'].append(profit)
return test
info = get_name('https://www.ccotc.cn/Enterprise/index/cid/69/lid/84/industryid/C/cityid/10/businessIncomeId/A/order/4/p/2.html')
python解析html,提取注释部分
最新推荐文章于 2024-08-27 10:00:00 发布