导入包
import requests
import os
爬取京东前100条信息
url = "https://www.jd.com"
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
print(r.apparent_encoding)
print(r.text[:100])
except:
print("爬取失败")
爬取能搜到python的多少条信息
keyword = "Python"
try:
kv = {'wd': keyword}
r = requests.get('http://www.baidu.com/s', params=kv)
print(r.request.url)
r.raise_for_status()
print(len(r.text))
except:
print("爬取失败")
爬取一张图片,存在指定目录
url = "https://ss1.bdstatic.com/70cFvXSh_Q1YnxGkpoWK1HF6hhy/it/u=2039492333,3625458240&fm=26&gp=0.jpg"
path = "E://abc.psd"
try:
r = requests.get(url)
with open(path, 'wb') as f:
f.write(r.content)
f.close()
print("文件已保存")
except:
print("爬取失败")
用请求头进行爬取
url = "https://www.jd.com"
try:
kv = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/78.0.3904.108 Safari/537.36'}
r = requests.get(url, headers=kv)
r.raise_for_status()
r.encoding = r.apparent_encoding
print(r.text[1000:2000])
except:
print("爬取失败")