python爬虫百度图片_【Python】爬取百度图片

最新推荐文章于 2022-12-22 18:05:00 发布

weixin_39944515

最新推荐文章于 2022-12-22 18:05:00 发布

阅读量99

点赞数

文章标签： python爬虫百度图片

1 #-*- coding:utf-8

2 importre3 importrequests4 from urllib importerror5 from bs4 importBeautifulSoup6 importos7

8 num =09 numPicture =010 file = ''

11 List =[]12

13 #检测图片数量函数

14 defFind(url):15 globalList #设置为全局变量16 print('正在检测图片总数，请稍等.....')17 t = 0 #objURL 分页数初始值

18 i = 1

19 s =020 while t < 1000:21 Url = url + str(t) #url地址加上分页数

22 try:23 Result = requests.get(Url, timeout=7) #获取到url.timeout时间为7秒.如果获取不到7秒后退出

24 exceptBaseException:25 t = t + 60 #源代码分页数为60

26 continue

27 else:28 result = Result.text #以encoding解析返回内容。字符串方式的响应体，会自动根据响应头部的字符编码进行解码。

29 pic_url = re.findall('"objURL":"(.*?)",', result, re.S) #先利用正则表达式找到图片url

30 s += len(pic_url) #根据正则表达式循环取出图片.(根据图片的数量长度来取,其实就是统计图片的个数)

31 if len(pic_url) == 0: #长度为0说明没有符合条件的图片了退出

32 break

33 else:34 List.append(pic_url) #将取出的图片存入到list中去

35 t = t + 60

36 returns37

38 #推荐函数（推荐函数，主要是根据你键入的文本，在百度图片里找到相似的内容，返回给用户，类似于百度搜索的最下面）

39 defrecommend(url):40 Re =[]41 try:42 html = requests.get(url) #获取url

43 excepterror.HTTPError as e:44 return

45 else:46 html.encoding = 'utf-8' #html解码格式为utf-8

47 bsObj = BeautifulSoup(html.text, 'html.parser') #html.text 根据encoding定义的code返回内容. html.parser 是解析器

48 div = bsObj.find('div', id='topRS') #通过find()函数获取标签

49 if div is notNone:50 listA = div.findAll('a') #获取子标签 find_All（）返回的是一个list find()直接返回结果

51 for i inlistA:52 if i is notNone:53 Re.append(i.get_text())54 returnRe55

56 #下载图片函数

57 defdowmloadPicture(html, keyword):58 globalnum59 #t =0

60 pic_url = re.findall('"objURL":"(.*?)",', html, re.S) #先利用正则表达式找到图片url

61 print('找到关键词:' + keyword + '的图片，即将开始下载图片...')62 for each inpic_url:63 print('正在下载第' + str(num + 1) + '张图片，图片地址:' +str(each))64 try:65 if each is notNone:66 pic = requests.get(each, timeout=7)67 else:68 continue

69 exceptBaseException:70 print('错误，当前图片无法下载')71 continue

72 else:73 string = file + r'\\' + keyword + '_' + str(num) + '.jpg'

74 fp = open(string, 'wb')75 fp.write(pic.content)76 #text 返回的是unicode 型的数据，一般是在网页的header中定义的编码形式。

77 #content返回的是bytes，二级制型的数据。也就是说你如果想要提取文本就用text.但是如果你想要提取图片、文件，就要用到content

78 fp.close()79 num += 1

80 if num >=numPicture:81 return

84 if __name__ == '__main__': #主函数入口

85 word = input("请输入搜索关键词(可以是人名，地名等):")86 #add = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=%E5%BC%A0%E5%A4%A9%E7%88%B1&pn=120'

87 url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=' + word + '&pn='

88 tot =Find(url)89 Recommend = recommend(url) #记录相关推荐

90 print('经过检测%s类图片共有%d张' %(word, tot))91 numPicture = int(input('请输入想要下载的图片数量'))92 file = input('请建立一个存储图片的文件夹，输入文件夹名称即可')93 y =os.path.exists(file)94 if y == 1:95 print('该文件已存在，请重新输入')96 file = input('请建立一个存储图片的文件夹，)输入文件夹名称即可')97 os.mkdir(file)98 else:99 os.mkdir(file)100 t =0101 tmp =url102 while t

weixin_39944515

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python爬虫百度图片_【Python】爬取百度图片

1 #-*- coding:utf-82 importre3 importrequests4 from urllib importerror5 from bs4 importBeautifulSoup6 importos78 num =09 numPicture =010 file = ''11 List =[]1213 #检测图片数量函数14 defFind(url):15 global...
复制链接

扫一扫