import re
import bs4
import requests
from bs4 import BeautifulSoup
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return "error !"
def fill_list(ulist,html):
soup = BeautifulSoup(html,"html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr,bs4.element.Tag):
tds = tr('td')
#print(tds)
print(tds[0].p.string)
ulist.append([tds[0].p.string,tds[1].p.string])
pass
def print_list(ulist):
f=open("信息工程学院学生信息.txt",'w')
for u in ulist:
f.write('\t'.join(u))
f.write('\n')
f.close()
def main():
url = "http://it.chd.edu.cn/info/1064/7271.htm"
html = getHTMLText(url)
ulis = []
fill_list(ulis,html)
print_list(ulis)
main()