python爬虫学习16
-
requests 库的高级用法
-
文件上传
# 还记得我们之前获取的.ico文件吗?现在我们使用它进行文件上传的操作 import requests # 在上传文件时,要保证要上传的文件与程序存在于同一文件夹下 files = {'file': open('favicon.ico', 'rb')} url = 'https://www.httpbin.org/post' r = requests.post(url, files=files) print(r.text)
运行结果:返回的响应比常规的响应多了files字段和form字段
-
Cookie 设置
# 获取 cookie import requests url = 'https://www.baidu.com' r = requests.get(url) print(r.cookies) for key,value in r.cookies.items(): print(key, '=', value)
运行结果:
# 直接使用Cookie维持登录状态(以GitHub为例) # 登录 GitHub 获取 cookie # 下图是我登录GitHub后找到的cookie
# 将cookie放置到请求头中 import requests url = 'https://github.com/' headers = { 'cookie': '_octo=GH1.1.1662807162.1635003280; _device_id=172b02e053edbd3c0916c3d9e18629c6; tz=Asia%2FShanghai; has_recent_activity=1; tz=Asia%2FShanghai; color_mode=%7B%22color_mode%22%3A%22auto%22%2C%22light_theme%22%3A%7B%22name%22%3A%22light%22%2C%22color_mode%22%3A%22light%22%7D%2C%22dark_theme%22%3A%7B%22name%22%3A%22dark%22%2C%22color_mode%22%3A%22dark%22%7D%7D; logged_in=yes; dotcom_user=suchangGuo; user_session=CdRxR9aFlRTeOzI2tgIIF0EkkT_zZorFKwLaGWE8zFIyRox3; _gh_sess=oXO4mMftS%2Bn1S5bGvnpAv9Ih0W65ms5g3ooJv77X7g5U6HANAlnymeuYDDQnCEYExAsnpj5r37WtRZFcwTJBso3VxC4RpNs9E%2BKIaI8s9RgQuxak8m8VQbs8kmPXLZM%2BN99cd%2BZzu%2BxFS%2BL5LdqkImEB9YXAW6FYqbeUdEm8KkdsZ2jXpypBNT1ZM%2ByCkQZ1y8iPLq30AYKLAVlHh7ppwk%2FXdWKUWQRuh2Tb3G1ruVTPBwp%2Fj6zzfbPLvMYNckCArSciZXSOQWNNAJZsGpuGHP%2BSeI0x7yMznVYRqayES1QhwjgCy%2FRtUxry370pEuOgXbvzNm%2BMnvBDXLVtcI1%2BibDiaOhkQVs4LCYe1K7chHcwOzZqbb44uU0dwE9cOfR0l5Fe%2FwucPkeIpMk8O6m0y37VInQ2aCymnwzYotoaGU5Oz1B%2Fn4qy736aP66T0MgxgEFeO2SJu3XaRlUkwKeD5MdlUOMfrQ5IGJtjprkLhQeFwnOM85m650d9DzJQV4LI2oN8bnAKxmh%2BAsvTQyNxbw%2B4FSGRtc2v5OPZ1GFXIL%2BqJYSNYavRwVKIf%2B4wKQfx4QMtW4jvsvtsDiUaUQNhzitTHdoDSXS6iyga2cGgesUlqxNoElx19R5wI7OuLfly14Hxf%2BoxkN04TYxECe7i34fh6o5heVBuOXlM4MGwD9n8xb3hd91%2F0EpfTL1EQ6AlfOKO17sJkxh9ZblbRkv4Qxrsnj57vl4BftbasZ%2Fa0FzkAqj%2F8ci8e9Kajy7uZzpjoNFHkuFCKIQhIKZoLbuEXwcMmF5rjwUDUeH6yly0hAS4YE1R70Qf2tbUcV%2FJsDCH49wtnu03dhPniytIsayrk6EtM2jmWeWDrnGNao1Uu3T3cK4ASys4VSH8hsvfmzkiZHELAnJ%2BzXKwpMSdZDb1PDuu0Xou%2FOiJoyQtAw%3D%3D--Y11NmLqVVLv9rvgo--DvkpSyI94WND4WSVneLRuw%3D%3D' , 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.2242 SLBChan/11' } r = requests.get(url, headers=headers) with open('receive.txt', 'a+', encoding='utf-8') as f: f.write(r.text)
运行结果:文件夹多了一个名为receive的txt文件
# 将 cookie 直接传给 cookies 参数 import requests as req url = 'https://github.com/' cookies = '_octo=GH1.1.1662807162.1635003280; _device_id=172b02e053edbd3c0916c3d9e18629c6; tz=Asia%2FShanghai; has_recent_activity=1; tz=Asia%2FShanghai; color_mode=%7B%22color_mode%22%3A%22auto%22%2C%22light_theme%22%3A%7B%22name%22%3A%22light%22%2C%22color_mode%22%3A%22light%22%7D%2C%22dark_theme%22%3A%7B%22name%22%3A%22dark%22%2C%22color_mode%22%3A%22dark%22%7D%7D; logged_in=yes; dotcom_user=suchangGuo; user_session=CdRxR9aFlRTeOzI2tgIIF0EkkT_zZorFKwLaGWE8zFIyRox3; _gh_sess=oXO4mMftS%2Bn1S5bGvnpAv9Ih0W65ms5g3ooJv77X7g5U6HANAlnymeuYDDQnCEYExAsnpj5r37WtRZFcwTJBso3VxC4RpNs9E%2BKIaI8s9RgQuxak8m8VQbs8kmPXLZM%2BN99cd%2BZzu%2BxFS%2BL5LdqkImEB9YXAW6FYqbeUdEm8KkdsZ2jXpypBNT1ZM%2ByCkQZ1y8iPLq30AYKLAVlHh7ppwk%2FXdWKUWQRuh2Tb3G1ruVTPBwp%2Fj6zzfbPLvMYNckCArSciZXSOQWNNAJZsGpuGHP%2BSeI0x7yMznVYRqayES1QhwjgCy%2FRtUxry370pEuOgXbvzNm%2BMnvBDXLVtcI1%2BibDiaOhkQVs4LCYe1K7chHcwOzZqbb44uU0dwE9cOfR0l5Fe%2FwucPkeIpMk8O6m0y37VInQ2aCymnwzYotoaGU5Oz1B%2Fn4qy736aP66T0MgxgEFeO2SJu3XaRlUkwKeD5MdlUOMfrQ5IGJtjprkLhQeFwnOM85m650d9DzJQV4LI2oN8bnAKxmh%2BAsvTQyNxbw%2B4FSGRtc2v5OPZ1GFXIL%2BqJYSNYavRwVKIf%2B4wKQfx4QMtW4jvsvtsDiUaUQNhzitTHdoDSXS6iyga2cGgesUlqxNoElx19R5wI7OuLfly14Hxf%2BoxkN04TYxECe7i34fh6o5heVBuOXlM4MGwD9n8xb3hd91%2F0EpfTL1EQ6AlfOKO17sJkxh9ZblbRkv4Qxrsnj57vl4BftbasZ%2Fa0FzkAqj%2F8ci8e9Kajy7uZzpjoNFHkuFCKIQhIKZoLbuEXwcMmF5rjwUDUeH6yly0hAS4YE1R70Qf2tbUcV%2FJsDCH49wtnu03dhPniytIsayrk6EtM2jmWeWDrnGNao1Uu3T3cK4ASys4VSH8hsvfmzkiZHELAnJ%2BzXKwpMSdZDb1PDuu0Xou%2FOiJoyQtAw%3D%3D--Y11NmLqVVLv9rvgo--DvkpSyI94WND4WSVneLRuw%3D%3D' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.2242 SLBChan/11' } jar = req.cookies.RequestsCookieJar() for cookie in cookies.split(';'): key, value = cookie.split('=', 1) jar.set(key, value) r = req.get(url, cookies=jar, headers=headers) print(r.text)
运行结果:
一大串网页代码.jpg
-
今日结束,明日可能继续。。。