import gevent.monkey
gevent.monkey.patch_all()
# 补丁 修改了低层 scoket模块; 非阻塞
from gevent.pool import Pool
from queue import Queue
import time
import re
import csv
import requests
list = []
list_url = []
class Yin_yue():
def __init__(self):
self.base_url = 'https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?'
self.pool = Pool()
self.album_url_queue = Queue()
self.song_into_No1_url_queue = Queue()
self.song_into_No2_url_queue = Queue()
self.request_number = 0
self.response_number = 0
self.is_runing = True
self.csv_headers= ['showname','name','performers','mid','song_data_link','song_id','img_url','lyric','mp3url','publish_time']
self.params = {
'type': '1',
'json': '1',
'utf8': '1',
'onlysong': '0',
'disstid': '3194950414',
'g_tk': '5381',
'loginUin': '0',
'hostUin': '0',
'format': 'json',
'inCharset': 'utf8',
'outCharset': 'utf-8',
'notice': '0',
'platform': 'yqq.json',
'needNewCode': '0',
}
self.headers = {
"Referer": "https://y.qq.com/portal/player.html",
"Cookie": "skey=@LVJPZmJUX; p"
}
self.album_headers = {
'accept': 'application/json, text/javascript, */*; q=0.01',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'cookie': 'pgv_pvid=7046669248; yqq_stat=0; pgv_pvi=6825585664; pgv_si=s6398292992; pgv_info=ssid=s9704515262; ts_uid=4695799595; yq_index=0; player_exist=1; yq_playschange=0; yq_playdata=; qqmusic_fromtag=66; yplayer_open=0; ts_last=y.qq.com/n/yqq/playlist/3194950414.html',
'origin': 'https://y.qq.com',
'referer': 'https://y.qq.com/n/yqq/playlist/3194950414.html',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
}
f = open('美国专业幼教推荐英语儿歌.csv','w')
self.f_csv = csv.writer(f)
self.f_csv = csv.DictWriter(f,self.csv_headers)
self.f_csv.writeheader()
def request_url(self,url):
response = requests.get(url, headers = self.headers)
return response
def get_song_link(self):
url = self.base_url
response = requests.get(url,params = self.params,headers = self.album_headers)
response.encoding = 'utf8'
dict_song_data = eval(response.text)
list_song = dict_song_data['cdlist'][0]['songlist']
for song in list_song:
song_dict = {}
self.request_number += 1
song_dict['showname'] = song['albumname']
song_dict['name'] = song['songname']
song_dict['performers'] = song['singer'][0]['name']
song_dict['mid'] = song['songmid']
song_dict['song_data_link'] = 'https://y.qq.com/n/yqq/song/{}.html'.format(song_dict['mid'])
self.song_into_No1_url_queue.put(song_dict)
def get_song_url_data(self):
song_dict = self.song_into_No1_url_queue.get()
url = song_dict['song_data_link']
response = self.request_url(url)
if 'g_SongData' in response.text:
song_data = re.findall('var g_SongData = (.*?);', response.text)[0]
song_data = eval(song_data)
print('url', url)
print('song_data',song_data)
song_dict['song_id'] = song_data['songid']
img = re.findall('<img src="(.*?)" onerror', response.text)[0]
song_dict['img_url'] = 'https:' + img
lyc_url = 'https://c.y.qq.com/lyric/fcgi-bin/fcg_query_lyric.fcg?nobase64=1&musicid={}'.format(song_dict['song_id'])
lyc_response = self.request_url(lyc_url)
dict_response = lyc_response.text.replace('MusicJsonCallback(', '').replace(')', '')
dict_response = eval(dict_response)
if 'lyric' in dict_response.keys():
song_dict['lyric'] = dict_response['lyric'].replace(':',':').replace(' ','\n').replace(' ',' ').replace('.','.') \
.replace('	',' ').replace('(','(').replace(')',')').replace('-','-').replace('&apos;','\'').replace(';','\'') \
.replace(''', '\'').replace(' ', '')
else:
song_dict['lyric'] = None
mp3_url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?data={"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"4678313832","songmid":["%s"],"uin":"0"}}}'%(song_dict['mid'])
response_mp3 = self.request_url(mp3_url)
response_mp3_url = eval(response_mp3.text)
half_url = response_mp3_url['req_0']['data']['midurlinfo'][0]['purl']
song_dict['mp3url'] = 'http://111.202.98.150/amobile.music.tc.qq.com/' + half_url
company_data = re.findall('info : (.*)', response.text)[0]
company_data = eval(company_data)
song_dict['publish_time'] = company_data['pub_time']['content'][0]['value'] if 'pub_time' in company_data.keys() else None
print(song_dict)
if 'C400' in song_dict['mp3url']:
self.save(song_dict)
else:
open('error_url.txt','a').write(url + '\n' + song_dict['mp3url'] + '\n')
def save(self,data):
if data:
self.response_number += 1
self.f_csv.writerow(data)
def callback_url_data(self,item):
if self.is_runing:
self.pool.apply_async(self.get_song_url_data, callback=self.callback_url_data)
def run(self):
start_time = time.time()
self.get_song_link()
for i in range(40):
self.pool.apply_async(self.get_song_url_data,callback=self.callback_url_data)
while True:
time.sleep(2)
if self.response_number >= self.request_number:
self.is_runing = False
print(self.is_runing)
break
end_time = time.time()
print('total_time:',end_time - start_time)
if __name__ == '__main__':
QQ_Yin_yue = Yin_yue()
QQ_Yin_yue.run()