本关任务:本关我们将使用Python程序,实现通过网页的url,获得服务器返回的超文本文件,并保存为文本文件。
# -*- coding: utf-8 -*-
import requests
# 湖南大学首页
url = "http://www.hnu.edu.cn/"
# 文件名
filename = 'hnu.txt'
def gethtml(url):
# 请按下面的注释提示添加代码,完成相应功能
###### Begin ######
# 1.获取网页文件并返回
headers = {
'cookie': '__yadk_uid=1krj2OUwUMIYBXF6de9vdt00osSXCZL4; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22168408938e572a-0765f4fa1b3e94-5c6e3d76-1310720-168408938eb4d8%22%2C%22%24device_id%22%3A%22168408938e572a-0765f4fa1b3e94-5c6e3d76-1310720-168408938eb4d8%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; read_mode=day; default_font=font2; locale=zh-CN; _m7e_session_core=403efa576d24ed205d7b68139d3d789c; Hm_lvt_0c0e9d9b1e7d617b3e6842e85b9fb068=1565858739,1565858820,1565859264,1565916094; signin_redirect=https%3A%2F%2Fwww.jianshu.com%2F; Hm_lpvt_0c0e9d9b1e7d617b3e6842e85b9fb068=1565916316', \
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0'}
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
html = response.text
return html
####### End #######
def saveinfo(html, filename):
# 请按下面的注释提示添加代码,完成相应功能
###### Begin ######
# 2.将html保存到文件文件
with open('hnu.txt', 'w',encoding='utf-8') as f:
f.write(html)
# ####### End #######
#
#
html = gethtml(url)
saveinfo(html, filename)