import requests import os import json if not os.path.exists('./tupian'): os.mkdir('./tupian') header = { 'Cookie':'BDqhfp=%E5%B0%8F%E5%A7%90%E5%A7%90%26%260-10-1undefined%26%26816%26%262; BAIDUID=FEBB36715A23D717304603BE47498B4A:FG=1; BAIDUID_BFESS=FEBB36715A23D717304603BE47498B4A:FG=1; BIDUPSID=FEBB36715A23D717304603BE47498B4A; PSTM=1664804657; ZFY=:A9MPhCebbFX:A7KBHCfjYItzPiy0gq55zQkxRB5e06sQ:C; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; BDRCVFR[Txj84yDU4nc]=mk3SLVN4HKm; BDRCVFR[tox4WRQ4-Km]=mk3SLVN4HKm; H_PS_PSSID=26350; BA_HECTOR=8h8g2hah20a0810kaha1c9rc1hjok1i1a; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; userFrom=www.baidu.com; firstShowTip=1; indexPageSugList=%5B%22%E5%B0%8F%E5%A7%90%E5%A7%90%22%2C%22%E6%83%85%E7%BB%AA%E8%A1%A8%E6%83%85%E5%9B%BE%E7%89%87%22%2C%22%E6%83%85%E7%BB%AA%E5%9B%BE%E7%89%87%E5%8D%A1%E9%80%9A%22%5D; cleanHistoryStatus=0; ab_sr=1.0.1_M2FiZjQ1MGVjMmYzNWQ1MmVlYzQ0MDNlNDI3ZDEyMDYwODBhYTYwMDkyODJjYzZkNDhhNjJkYzYxMzQwNDgxNDlhMTNlNDk2MjM4ZTRkMDVkNTI1ODUwODAzYmI0NTIxNjY0M2Q3ZGZmMGY5NDhhOTQxMWYxM2Y4ZmE5YjJiNjA2MzU4MDhkMDY5ZTc0YWU1YWMxNjg2YTc3MThiNTMwOA==', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36' } url = 'https://image.baidu.com/search/acjson?' param = { 'tn': 'resultjson_com', 'logid': '12449154299384042955', 'ipn': 'rj', 'ct': '201326592', 'is': '', 'fp': 'result', 'fr': '', 'word': '小姐姐', 'queryWord': '小姐姐', 'cl': '', 'lm': '-1', 'ie': 'utf-8', 'oe': 'utf-8', 'adpicid': '', 'st': '-1', 'z': '', 'ic': '0', 'hd': '', 'latest': '', 'copyright': '', 's': '', 'se': '', 'tab': '', 'width': '', 'height': '', 'face': ' 0', 'istype': '2', 'qc': '', 'nc': '1', 'expermode': '', 'nojc': '', 'isAsync': '', 'pn': 'pn', 'rn': '30', 'gsm': '' } i =1 for i in range(1,4): print(i) param['pn'] = i*30 print(param) #通过打印验证翻页是否成功 res = requests.get(url= url,headers= header,params= param).text dict = json.loads(res) list = dict['data'] #print(list) #利用列表切片正确提取url for item in list[:-1]: src = item['thumbURL'] # print(src) Name = item['fromPageTitle'][:5]+'.jpg' res1 = requests.get(src, headers=header).content img_path = 'tupian/'+Name with open(img_path,'wb') as f: f.write(res1)
爬虫成长之路1
最新推荐文章于 2024-09-17 19:26:23 发布