import requests
from bs4 import BeautifulSoup
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'}
def downpagedata(page):
url = 'https://www.maigoo.com/brand/list_1111.html?maxpage=&tabnum=&sort=&defaultids=&start=&thirdaction=&subaction=resultlist&action=searchlist&action=searchlist&subaction=resultlist&page=' + str(page)
res = requests.get(url,headers=headers).text
soup = BeautifulSoup(res,'lxml')
a_list = soup.find_all(class_='name font18 color1 bg2 b')
page_data = []
# print('0',a_list)
for a in a_list:
data_url= a['href']
# print('22',a)
company_name = a.text
data_res = requests.get(data_url,headers=headers).text
data_soup = BeautifulSoup(data_res,'lxml')
if data_soup.select('.c666 li a'):
company_url = data_soup.select('.c666 li a')[0].text
# print('1', company_url, company_name)
page_data.append((company_name, company_url))
# print(page_data)
else:
pass
return (page_data)
html_str = '''<!DOCTYPE html>
<html lang="zh-has">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<ul>'''
all_data =[]
for page in range(1, 14):
all_data.extend(downpagedata(page))
for data in all_data:
html_str = html_str + '<li><a href="' + data[1] + '">' + data[0] + '</a></li>\n'
html_str = html_str + '</ul> </body>'
with open('dianti.html','w+',encoding='utf-8') as fp:
fp.write(html_str)
fp.close()
print('ok')
11-14