import grequests,json,time,requests,os
import uuid,re
from bs4 import BeautifulSoup
def imgload(url):
print(len(url))
response_list = grequests.map((grequests.get(img_url) for img_url in url),size=int(len(url)))
print("开始下载")
for i in response_list:
print(len(response_list))
filename = os.path.join('G:\手机壁纸', str(uuid.uuid4())+'.jpg')
# os.path.join(os.path.abspath('.'),)
with open(filename, 'wb') as f:
f.write(i.content)
def is_chinese(string):
"""
检查整个字符串是否包含中文
:param string: 需要检查的字符串
:return: bool
"""
for ch in string:
if u'\u4e00' <= ch <= u'\u9fff':
return True
return False
request_list = [
grequests.get('http://m.bcoderss.com/tag/%E7%BE%8E%E5%A5%B3/page/0/'),
grequests.get('http://m.bcoderss.com/tag/%E7%BE%8E%E5%A5%B3/page/1/'),
grequests.get('http://m.bcoderss.com/tag/%E7%BE%8E%E5%A5%B3/page/2/'),
grequests.get('http://m.bcoderss.com/tag/%E7%BE%8E%E5%A5%B3/page/3/'),
grequests.get('http://m.bcoderss.com/tag/%E7%BE%8E%E5%A5%B3/page/4/')
]
img_url=[]
# ##### 执行并获取响应列表 #####
start_time=time.time()
response_list = grequests.map(request_list,size=5)
print(response_list)
for i in response_list:
suop = BeautifulSoup(i.text, features='html.parser')
ul=suop.find('ul',id='main')
li_list=ul.find_all('li')
for j in li_list:
img=j.find('img').attrs.get('src')
if not is_chinese(img):
uul=img.replace('-260x534','')
print(img,uul)
img_url.append(uul)
print(img_url)
imgload(img_url)
consum=time.time()-start_time
print(str(float('%.2f' % consum))+'s')
1.手机壁纸爬取
最新推荐文章于 2023-04-28 17:10:39 发布