import socket
import re
# 获取到资源地址 这里https要改成http
urls=['http://pic.netbian.com/uploads/allimg/220211/004115-1644511275bc26.jpg','http://pic.netbian.com/uploads/allimg/220215/233510-16449393101c46.jpg','http://pic.netbian.com/uploads/allimg/211120/005250-1637340770807b.jpg']
for url in urls:
client = socket.socket()
# 创建连接
client.connect(('pic.netbian.com', 80))
# 构造http请求
http_res = 'GET ' + url + ' HTTP/1.0\r\nHost:pic.netbian.com\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36\r\n\r\n'
# 发送请求
client.send(http_res.encode())
# 建立一个二进制对象用来存储我们得到的数据
result = b''
data = client.recv(1024)
# 循环接收响应数据 添加到bytes类型
while data:
result += data
data = client.recv(1024)
print(result)
# 提取数据
# re.S使 . 匹配包括换行在内的所有字符 去掉响应头
images = re.findall(b'\r\n\r\n(.*)', result, re.S)
# print(images[0])
# 打开一个文件,将我们读取到的数据存入进去,即下载到本地我们获取到的图片
with open(url.split('/')[-1], 'wb') as f:
f.write(images[0])
Python爬虫---socket模块http请求下载图片
最新推荐文章于 2023-09-15 02:04:43 发布