#豆瓣电影
#https://movie.douban.com/j/chart/top_list?type=3&interval_id=100%3A90&action=&
# start=0&limit=20
#https://movie.douban.com/j/chart/top_list?type=3&interval_id=100:90&action=&
# start=20&limit=20
#https://movie.douban.com/j/chart/top_list?type=3&interval_id=100:90&action=&
# start=40&limit=20
#https://movie.douban.com/j/chart/top_list?type=3&interval_id=100%3A90&action=&
# start=60&limit=20
#page 1 2 3 4 5 6 100
#start 0 20 40 60 80 100 1980
#start=(page-1)*20
import urllib.request
import urllib.parse
#创建获取信息
def create_request(page):
base_url = 'https://movie.douban.com/j/chart/top_list?type=3&interval_id=100:90&action=&'
data={
'start':(page-1)*20,
'limit':20
}
data=urllib.parse.urlencode(data)
url=base_url+data
print(url)
# UA
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; U; Android 7.0; zh-cn; PRA-AL00X Build/HONORPRA-AL00X) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/8.0 Mobile Safari/537.36'
}
# 请求对象定制
request = urllib.request.Request(url=url, headers=headers)
return request
#获取内容
def get_content(request):
response=urllib.request.urlopen(request)
content=response.read().decode('utf-8')
return content
#下载到本地
def down_load(page,content):
with open('豆瓣_'+str(page)+'.json','w',encoding='utf-8') as fp:
fp.write(content)
#程序入口
if __name__ == '__main__':
start_page =int(input('请输入起始页码:'))
end_page =int(input('请输入结束页码:'))
for page in range(start_page,end_page+1):
request=create_request(page)
content=get_content(request)
down_load(page,content)
------------
#肯德基官网
#第一页 post请求
#http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname
# cname: 张家界
# pid:
# pageIndex: 1
# pageSize: 10
#第二页 post请求
# http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname
# cname: 张家界
# pid:
# pageIndex: 2
# pageSize: 10
# http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname
# cname: 张家界
# pid:
# pageIndex: 3
# pageSize: 10
import urllib.request
import urllib.parse
def ccreate_request(page):
base_url='http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
data={
'cname': '张家界',
'pid':'',
'pageIndex': page,
'pageSize': '10',
}
data=urllib.parse.urlencode(data).encode('utf-8')
# UA
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; U; Android 7.0; zh-cn; PRA-AL00X Build/HONORPRA-AL00X) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/8.0 Mobile Safari/537.36'
}
#请求对象定制
request=urllib.request.Request(url=base_url,headers=headers,data=data)
return request
def get_content(request):
response=urllib.request.urlopen(request)
content=response.read().decode('utf-8')
return content
def down_load(content,page):
with open('肯德基_'+str(page)+'.json','w',encoding='utf-8') as fp:
fp.write(content)
#程序人口
if __name__ == '__main__':
#输入起始页
start_page=int(input('请输入起始页:'))
#结束页码
end_page = int(input('请输入起始页:'))
for page in range(start_page,end_page+1):
# 请求的对象
request=ccreate_request(page)
#获取网页源码
content=get_content(request)
#下载
down_load(content,page)