# -*- coding:UTF-8 -*-
import requests
from bs4 import BeautifulSoup
import json
import urllib
import request
if __name__ == "__main__":
server = 'http://www.jit.edu.cn/'
url = 'http://www.jit.edu.cn/myNews_list_out.aspx?infotype=2'
#根据当前url获得网页回应
req = requests.get(url)
#print(req.text)
#获取回应中的HTML内容
html = req.text
data = {'__VIEWSTATE': ''}
subLink = BeautifulSoup(html,"html.parser")
data['__VIEWSTATE'] = subLink.find('input',{'id':"__VIEWSTATE"})['value']
r = requests.post(url,data)
#利用BS函数对HTMl内容进行处理,具体请参照BS函数用法:http://beautifulsoup.readthedocs.io/zh_CN/latest/
div_bf = BeautifulSoup(r.text,"html.parser")
#获得html中class='typelist'的div部分
div = div_bf.find_all('div', class_ = &#
爬取学校新闻网站信息
最新推荐文章于 2024-12-08 00:10:16 发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)