>>> import re
>>> from bs4 import BeautifulSoup
>>> import urllib.request
>>> import lxml
>>> import requests
>>> url = 'http://www1.szu.edu.cn/board/'
>>> page=requests.get(url).text
>>> pagesoup=BeautifulSoup(page,'lxml')
>>> for link in pagesoup.find_all(name='a',attrs={"href":re.compile(r'^http:')}):
... print(link.get('href'))
...
http://www.szu.edu.cn
http://news.szu.edu.cn
http://210.39.3.155:9090/goLogin.do
http://www.szu.edu.cn/yxjg/xyxb.htm
http://www.szu.edu.cn/yxjg/znbm.htm
http://www.miibeian.gov.cn
>>> from bs4 import BeautifulSoup
>>> import urllib.request
>>> import lxml
>>> import requests
>>> url = 'http://www1.szu.edu.cn/board/'
>>> page=requests.get(url).text
>>> pagesoup=BeautifulSoup(page,'lxml')
>>> for link in pagesoup.find_all(name='a',attrs={"href":re.compile(r'^http:')}):
... print(link.get('href'))
...
http://www.szu.edu.cn
http://news.szu.edu.cn
http://210.39.3.155:9090/goLogin.do
http://www.szu.edu.cn/yxjg/xyxb.htm
http://www.szu.edu.cn/yxjg/znbm.htm
http://www.miibeian.gov.cn
>>>
--------------------------------
>>> html = requests.get(url)
>>> soup = BeautifulSoup(html.text,"lxml")
>>> print(soup.get_text())
---------------------
>>> # -*- coding: utf-8 -*-
...
>>> import re
>>> from bs4 import BeautifulSoup
>>> import urllib.request
>>> import lxml
>>> import requests
>>> url = 'http://www1.szu.edu.cn/board/'
>>> html = requests.get(url)
>>> soup = BeautifulSoup(html.text,"lxml")
>>> print(soup.get_text())
-----------------------------------
>>> webdata = requests.get(url)
>>> webdata.encoding = 'GBK'
>>> print(webdata.text)
-----