python中requests模块使用_get_post_文件上传_图片爬取

煜磊

已于 2023-10-25 22:06:55 修改

阅读量165

点赞数

分类专栏： python 文章标签： python 开发语言

于 2023-09-25 08:40:02 首次发布

本文链接：https://blog.csdn.net/weixin_42786460/article/details/133265879

版权

python 专栏收录该内容

10 篇文章 0 订阅

订阅专栏

python中requests模块使用_get_post_文件上传_图片爬取

文章目录

- python中requests模块使用_get_post_文件上传_图片爬取

1 模拟浏览器指纹

import requests
url = "http://1.1.1.154/pythonSpider/index.html"
headers = {
    "User-Agent": "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
}

req= requests.Session()
res= req.get(url=url,headers=headers)
print(res.request.headers)

2 发送get 请求

import requests
url = "http://1.1.1.154/test/1.php"
headers = {
    "User-Agent": "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
}
req = requests.Session()
params={
    'id':'csm'
}

res = req.get(url=url,headers=headers,params=params)

print(res.text)

3 发送post 请求

import requests
url = "http://1.1.1.154/test/1.php"
headers = {
    "User-Agent": "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
}
req = requests.Session()
date={
    'id':'csm'
}

res = req.post(url=url,headers=headers,data=date)

print(res.text)

4 文件上传

import requests
import bs4
url = "http://1.1.1.154/DVWA-2.0.1/vulnerabilities/upload/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0",
    "Cookie": "security=low; PHPSESSID=cbg23venv9cp8hl2et169ct1tu",
    "Referer": "http://1.1.1.154/DVWA-2.0.1/vulnerabilities/upload/"
}
req = requests.Session()
date={
    'MAX_FILE_SIZE':100000,
    'Upload':'Upload'
}
files={
    'uploaded':('2.php', b'<?php phpinfo(); ?>','image/jpeg')
}

res = req.post(url=url,headers=headers,data=date,files=files)
html = res.text
html = bs4.BeautifulSoup(html,'lxml')
pre = html.findAll('pre')  #findAll('pre')方法将返回所有 <pre> 标签的列表
print(pre)
pre = pre[0].text
print(pre)
path = pre[0:pre.find(' ')]  #将pre字符串切片,取pre字符串从索引0到第一个空格所在的索引的字符（左闭右开）
print(f"path:{url}{path}")

5 服务器超时

import requests
url = "http://1.1.1.154/test/1.php"
headers = {
    "User-Agent": "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
}
req = requests.Session()
date={
    'id':'csm',
    'sleep':'5'
}
try:
    res = req.post(url=url,headers=headers,data=date,timeout=5)
except:
    print('服务器超时！')
else:
    print(res.text)

6 爬取图片文件

import requests
import re
import time
url = "http://192.168.225.204:9000/pythonSpider/index.html"
def get_html(url):
    res = requests.get(url= url)
    return res.content          #get_html方法，接收url发送get请求，以二进制的形式接收回应，拿到回应包

def download(path):
    resp = requests.get(url=path).content
    img_path = f"D:/testimage/{time.time()}.jpg"  #download方法，接收一个参数path，发送get请求，将收到的二进制回应二进制的形式写入./img目录下
    with open(img_path,"wb") as f:         #wb将以二进制文件打开并写入
        f.write(resp)  # 将resp中的数据写入到以二进制模式打开的img_path文件中
def get_img_path(url):
    html = get_html(url)    #get_img_path方法，接收url调用get_html方法
    img_url = re.findall(r"style/\w*\.jpg",html.decode()) #使用正则匹配，取出图片的地址
    for img_url_ in img_url:
        print(img_url_)
        path = url[0:url.rfind('/')+1] +img_url_  #将url地址从右侧以‘/’进行分割取前面的一部分加一个字符，在加上图片地址
        download(path)                            #调用download方法，将path传递给download
get_img_path(url=url) #调用get_img_path方法