爬取校园网站的部分信息
import requests
from bs4 import BeautifulSoup
header = {
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54"}
def get_link(url):
html=requests.get(url,headers=header)
return BeautifulSoup(html.content,'html.parser')
url1='https://www.dlut.edu.cn/xxgk/xxjj.htm' #爬取主页的URL
print("该网站的URL为:{}".format(url1))
result=''
a=get_link(url1)
n=a.select('span[style="color: rgb(255, 255, 255); text-align: center;"]')[0].get_text()[5:100]
print("详情:{}".format(n))
result +=f'详情:{n}\n'
b=get_link(url1)
s=b.select('p[style="text-align: justify; line-height: 2em; text-indent: 2em;"]')[0].get_text()[0:100]
print("介绍:{}".format(s))#介绍
result +=f'详情:{s}\n'