源码
import urllib.request
import urllib.parse
# get请求
get_url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&'
# start limit
page = int(input('请输入你想要第几页的数据:'))
number = 20
# 构建get参数
data = {
'start':(page-1)*number,
'limit':number,
}
# 将字典转换为query_string
query_string = urllib.parse.urlencode(data)
get_url += query_string
# 发送请求的过程
headers = {
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36',
}
request = urllib.request.Request(url=get_url,headers=headers)
response = urllib.request.urlopen(request)
all_data = response.read().decode()
with open('eg/dy.txt','w',encoding='utf-8') as fp:fp.write(all_data)
print(type(all_data))
print(all_data)