python爬取豆瓣电影内容
import urllib.parse
import urllib.request
"""
下载前N页豆瓣数据
"""
def create_request(page):
"定制请求对象"
bese_url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&'
data = {
'start':(page-1)*20,
'limit':20
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
}
data = urllib.parse.urlencode(data)
url = bese_url + data
request = urllib.request.Request(url=url, headers=headers)
return request
def get_content(request):
"获取响应内容"
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
return content
def down_load(page, content):
"下载响应内容"
with open('douban_' + str(page) + '.json', 'w', encoding='utf-8') as fp:
fp.write(content)
if __name__ == '__main__':
start_page = int(input("请输入起始页码"))
end_page = int(input("请输入结束页码"))
for page in range(start_page, end_page):
request = create_request(page)
content = get_content(request)
down_load(page, content)