#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020/12/17 13:32
# @Author : huni
# @File : 18asmr爬取.py
# @Software: PyCharm
import re
import os
import requests
from lxml import etree
import json
if __name__ == '__main__':
headers = {
'referer': 'https://18asmr.net/online/yinpin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'
}
# 2页到4页 可以自定义
for i in range(2,5):
more_url = 'https://18asmr.net/wp-admin/admin-ajax.php?action=zrz_load_more_posts'
para = {'action': 'zrz_load_more_posts'}
data = {
'type': 'catL17',
'paged': f'{i}'
}
resp1 = requests.post(url=more_url,headers=headers,params=para,data=data).text
resp1_j = json.loads(resp1)
find_more_link = re.findall(r'href="(.*?)" class="link-block"',resp1_j['msg'])
kv = {}
for href in find_more_link:
resp2 = requests.get(url=href,headers=headers).text
tree2 = etree.HTML(resp2)
judge_t = tree2.xpath('/html/head/meta[9]/@content')[0]
if len(judge_t) > 10:
key = judge_t.split(',')[-1]
if key == '在线音频':
find_link = re.findall(r"url: '(.*?)'",resp2)[0]
mp3_data = requests.get(url=find_link,headers=headers).content
mp3_name = find_link.split('/')[-1]
m_path = './18ASMR'
if not os.path.exists(m_path):
os.mkdir(m_path)
mp3_path = m_path + f'/{mp3_name}'
with open(mp3_path,'wb') as fp:
fp.write(mp3_data)
print(mp3_name,'下载完成!')
# 第一页
url = 'https://18asmr.net/online/yinpin'
resp3 = requests.get(url=url,headers=headers).text
tree = etree.HTML(resp3)
div_list = tree.xpath('//*[@id="main"]/div[1]/div')
for div in div_list:
href0 = div.xpath('./div/div[1]/a/@href')
resp = requests.get(url=href0,headers=headers).text
find_link = re.findall(r"url: '(.*?)'",resp)[0]
mp3_data = requests.get(url=find_link,headers=headers).content
mp3_name = find_link.split('/')[-1]
m_path = './18ASMR'
if not os.path.exists(m_path):
os.mkdir(m_path)
mp3_path = m_path + f'/{mp3_name}'
with open(mp3_path,'wb') as fp:
fp.write(mp3_data)
print(mp3_name,'下载完成!')
如果觉得还可以,可以打赏下小编,感恩