import urllib
from io import BytesIO
import urllib.request
from urllib.request import urlopen
import zlib
import gzip
def loadData(url, headers):
request = urllib.request.Request(url, headers=headers)
request.add_header('Accept-encoding', 'gzip,deflate')
response = urlopen(request)
content = response.read()
encoding = response.info().get('Content-Encoding')
if encoding == 'gzip':
content = gZip(content)
elif encoding == 'deflate':
content = deflate(content)
return content
def gZip(data):
buf = BytesIO(data)
f = gzip.GzipFile(fileobj=buf)
return f.read()
def deflate(data):
try:
return zlib.decompress(data, -zlib.MAX_WBITS)
except zlib.error:
return zlib.decompress(data)
def main():
url = "http://www.puyang.gov.cn/shownews.asp?id=109165"
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Pragma": "no-cache",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
}
content = loadData(url, headers)
html = content.decode("gb18030")
print(html)
if __name__ == '__main__':
main()
好像不太行欸@_@