import requests def cr_amaze(url): kv={"user-agent":"Mozilla/5.0"} try: r=requests.get(url,headers=kv)#改变请求头信息 r.raise_for_status() r.encoding=r.apparent_encoding return print(r.text[200:4000]) except: return print("失败") url="https://www.amazon.cn/ASICS-亚瑟士-男-跑步鞋GEL-GALAXY-9-T6G0N-4901-蓝色-白色-黄色-41-5/dp/B06VVY1T73/ref=lp_1897086071_1_1?s=shoes&ie=UTF8&qid=1498642523&sr=1-1&nodeID=1897086071&psd=1" cr_amaze(url) 百度搜素:import requests kv={} kv["wd"]= input("请输入搜索内容") try: r=requests.get("http://www.baidu.com/s",params=kv) print (r.url) r.raise_for_status() print(len(r.text)) except: print("失败")
爬取图片:import requests import os url="https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1498662852777&di=7e160ccc431f257866cc5bf355b4116e&imgtype=0&src=http%3A%2F%2Fn.sinaimg.cn%2Feladies%2Ftransform%2F20160823%2ForrG-fxvcsrm2264890.jpg" root="D://path//" path=root+str("4.jpg") #try: #1.下载路径不存在创建路径 if not os.path.exists(root): os.mkdir(root) #1下载的路径文件如果不存在下载 if not os.path.exists(path): r=requests.get(url) with open(path,'wb')as f: f.write(r.content) f.close() else: print("文件已经存在") #except: # print("爬取失败")
python_爬虫限制
最新推荐文章于 2021-04-20 22:42:05 发布