import requests
# ----------------------1. 添加User-Agent-------------------# headers = {# 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'# }## response = requests.get('https://www.51job.com/', headers=headers)## response.encoding = 'gbk'## print(response.text)# ----------------------2. 添加cookie--------------------------
headers ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36','cookie':'_zap=4d58cb38-ec48-47b2-9e47-8ff8ef963486; _xsrf=veOhJnW2hAC2BDcgK8KTU4NqUrLUYuTe; d_c0="AHAQrl0PjROPTn2Bv2wpyQXt8QUwjW6yjTU=|1628663892"; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1628663893; __snaker__id=EYMWPXdZknPXfAye; gdxidpyhxdE=QQQ9DNLdBqx13etuowzGeLbMfcPXBfHckpwZ%2BxZp06A8zi9JHMPDxcbRi4o%5Ca053y5oVnjBBBb99XeqPZicZtcN2%5CR7snyRY8LQP%2Ff1Lu%5CEaPuZo9DldazSjxxzCmy0GXU7zlEHvH5jbqRxsq3d4HX5PN3j%5Cw7yrH2Ls29BYDaDCm0%2Fb%3A1628664795621; _9755xjdesxxd_=32; YD00517437729195%3AWM_NI=xDnvQnHhpYF6yUCebu826Rf%2FtJfpY7qOemzjWKJqvTeiC%2FN7ac2Cye8KddfyGIjjNxMaj1gnnUNWT6pGUEzV16y8CNLWmizD0SakKVmh9ELwcWrCleatFrWHNaWfd%2F1ZdWM%3D; YD00517437729195%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eed4b479ad9e898bbc2591868bb3d85e968a9aaab566acb08196e862bbb4b7ade52af0fea7c3b92abc9ca297d4668cb8c0bac53ebb8b8d86e17ffceefab3e525b896aa91dc3db391ab8ef96295958692f560a7b78dd0cd3da3bdfea4fc6ffc95ac85e5738597a68bcd748fbfa6d2e666ae8b82b8d73eb4999ba6f95ef3eab7d9c2469089a38af950f48daf8cca5eafb8f7a6cf7da189bea4ef6fa3ac8a93d6448ebf9987e725f386acb8d037e2a3; YD00517437729195%3AWM_TID=Lftr4M6kyApFUEUBFFcv0DqgQ5uBSC%2FF; captcha_session_v2="2|1:0|10:1628663907|18:captcha_session_v2|88:OCtMcVVod1VSRDZ4Q2tTbGNyNUVIUXdJREc5Y0lSbjJyMklwSWh5MTA0NVhpL3JLak1CZXBPMEQ1ZlcycGludQ==|7f6c9d93866de2c49808fd0c3fa7ec6f7ef407e0fa6678072b00b577b351fb5f"; captcha_ticket_v2="2|1:0|10:1628663918|17:captcha_ticket_v2|704:eyJ2YWxpZGF0ZSI6IkNOMzFfWVZ5NHQwWm1wZlJWN1pSQzd6czQ0dUF6cG51Q2xFbHk2d0h0RjdYSWt4RDQ3ODJuOXMta2ROclliYkt6SFNGWUNvc2NCLklvdll0ejVZSmM5T0lOR1lwa2gxTTQwRWlVOWtmdmZqN3U3Q2g2Y2ZQU1c2VjJ3UDJvV0ZWa2hpLTJWUlF6ZDdmTWItMnRDV1dfOHM2ZkNpcFRsYlhOdUZaOXpVVDlCMXhGRy0xTkdoUnJrWlpkUERmelNiVzZMMk83WVVkSkVUSjJzZ1F1WEtnODBIaGV0NlNjcVpUdUt4ZUhSUFNyS1lOUGRfeTl5dEI5TUduS2xFUVpRYzB6REs3d0dzTWpKbW1FUzBiSlBDdUo1WURxd1F0cVdFLTFOX01TQUJOSjdraEYxbDZzSUxRcVVaZmE1NDR5OXRKVXBwa014TkQ2N3lDR0xxNG4yWENUaGhlLUlsMEEyTHFuV3RPa1ppSy1STENCWVVRdkZKaDVYMWR4YVhaeWl5QnpRZ2FrUE5UelNRVmg3RzJVeUJmU1VGVGRyMHpFODktWTcuRENMNzA5cVEuRnZTN0NfWk9XN0swOW9vaUs1anJMcC1SbHotWGRPdE9wTnZpbGJXY3U5dU0uSjFhNTFrODYxREJpZjhJQXJ4X21XMnotTmZMd0RkTzZHSEFpdkJhMyJ9|66c432f2881af153cce75b5940defd6b832a569f89e6b9eaebfd514b1d4ea329"; z_c0="2|1:0|10:1628663938|4:z_c0|92:Mi4xaW5CWUdRQUFBQUFBY0JDdVhRLU5FeVlBQUFCZ0FsVk5ncjRBWWdBZkNmZWVoMkphV0tZWDdSOUl6MVo1VFdPOXJB|58971e1efcbfec5e768e019f0c12ec85652bb22b2917e2cff02d68947b812353"; unlock_ticket="ADAc3rNA2xAmAAAAYAJVTYp3E2GJixRcAVFMYYkPJW256QCAFDClgw=="; tst=r; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1628664181; SESSIONID=ALeO6SNDSqNPVjPc3Ao25v7TXs18vru2Tvmqpwqdoal; KLBRSID=dc02df4a8178e8c4dfd0a3c8cbd8c726|1628664185|1628663890; JOID=UVARB04wYKVlY693JDctP1npsqUxeiDAK1P3NWBlGuc1NMMsS04dtwNuqHslc7UnVsGLPcQ2PuJWOI7F7kuQiRE=; osd=W1gVC086aKFpYqV_IDssNVHtvqQ7ciTMKln_MWxkEO8xOMImQ0oRtglmrHckeb0jWsCBNcA6P-hePILE5EOUhRA='}
response = requests.get('https://www.zhihu.com/', headers=headers)print(response.text)
获取数据的json数据接口,然后在发送请求
import requests
# 获取今日头条的json数据接口,然后在发送请求
url ="https://www.toutiao.com/hot-event/hot-board/?origin=toutiao_pc&_signature=_02B4Z6wo00d01X.g2AgAAIDDl0iJmFbkIVl.xNyAAD7ve5rc90eYpUagYiMEKQrfIz8iJPKuacCxb32tQcqbwZpt0i3u2X-hae-fgV3NqtDiEbEJK7EPc235gzTPL4EhVZ7cxFeHkLUI27pv29"
res = requests.get(url)print(res.json())
图片下载
defdownload_image(img_url):# 请求网络图片数据
res = requests.get(img_url)#获取数据保存到本地文件
data = res.content
f =open(f'files/{img_url.split("/")[-1]}',"wb")
f.write(data)
f.close()if __name__ =='__main__':
download_image('https://p5.toutiaoimg.com/img/pgc-image/9f5d102756354b6db8fa9408c57d01c8~cs_noop.png')
图片网
import requests
from re import findall
defdownload_image(img_url):# 请求网络图片数据
response = requests.get(img_url)# 获取数据保存到本地文件
data = response.content
withopen(f'files/千图网/{img_url.split("/")[-1].split("!")[0]}','wb')as fp:
fp.write(data)if __name__ =='__main__':
response = requests.get('https://www.58pic.com/tupian/qixi-0-0.html')
data = response.text
result = findall(r'(?s)<img src="(\S+?)">', data)print(result)for x in result:
download_image(f'https:{x}')print('下载成功')