1.图片爬取
复制来的。。
import requests
from bs4 import BeautifulSoup
import time
import json
import os
import socket
socket.setdefaulttimeout(8)
def sougou_pic_url(num, keyword):
pic_url = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
for i in range((num // 48) + 1):
url = 'https://pic.sogou.com/pics?query=' + keyword + '&mode=1&start={}&reqType=ajax&reqFrom=result&tn=0'.format(
i * 47)
imgs = requests.get(url)
jd = json.loads(imgs.text)
jd = jd['items']
for j in jd:
pic_url.append(j['pic_url'])
return pic_url
def down_img(num, keyword):
pic_url = sougou_pic_url(num, keyword)
if os.path.exists('D:/p_images/' + keyword):
pass