前提说明:
当你运行代码,会让你输入歌手编号。
拿张国荣举例,输入之后,直接运行即可,然后在本地会自动生成results文件夹,里面就是你喜欢的歌手的歌的歌词
源码奉上
import requests
from bs4 import BeautifulSoup
import re
import json
import time
import random
import os
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3355.4 Safari/537.36',
'Referer': 'http://music.163.com',
'Host': 'music.163.com'
}
# 获取页面源码
def GetHtml(url):
try:
res = requests.get(url=url,headers=headers)
except:
return None
return res.text
# 提取歌手歌词信息
def GetSongsInfo(url):
print('[INFO]:Getting Songs Info...')
html = GetHtml(url)
soup = BeautifulSoup(html,'lxml')
links = soup.find('ul',class_='f-hide').find_all('a')
if len(links)<1:
print('[Warning]:_GetSongsInfo <links> not find...')
Info={'ID':[],'NAME':[]}
for link in links:
SongID = link.get('href').split('=')[-1]
SongName = link.get_text()
Info['ID'].append(SongID)
Info['NAME'].append(SongName)
return Info
def GetLyrics(SongID):
print('[INFO]:Getting %s lyric...' % SongID)
ApiUrl = 'http://music.163.com/api/song/lyric?id={}&lv=1&kv=1&tv=-1'.format(SongID)
html = GetHtml(ApiUrl)
html_json = json.loads(html)
temp = html_json['lrc']['lyric']
rule = re.compile(r'(.*?)', re.I | re.S | re.M)
lyric = re.sub(rule,'',temp).strip()
return lyric
def SaveLyrics(SongName,lyric):
print('[INFO]: Start to Save {}...'.format(SongName))
if not os.path.isdir('./results'):
os.makedirs('./results')
with open('./results/{}.txt'.format(SongName),'w',encoding='utf-8')as f:
f.write(lyric)
def main():
SingerId = input('Enter the Singer ID:')
url = 'http://music.163.com/artist?id={}'.format(SingerId)
Info = GetSongsInfo(url)
IDs=Info['ID']
i=0
for ID in IDs:
print(ID)
lyric=GetLyrics(ID)
SaveLyrics(Info['NAME'][i],lyric)
i+=1
time.sleep(random.random() * 3)
main()
网易云评论,以张国荣举例
# coding:utf-8
import json
import time
import requests
from fake_useragent import UserAgent
import random
import multiprocessing
import sys
#reload(sys)
#sys.setdefaultencoding('utf-8')
ua = UserAgent(verify_ssl=False)
song_list = [{'186453':'春夏秋冬'},{'188204':'沉默是金'},{'188175':'倩女幽魂'},{'188489':'风继续吹'},{'187374':'我'},{'186760':'风雨起时'}]
headers = {
'Origin':'https://music.163.com',
'Referer': 'https://music.163.com/song?id=26620756',
'Host': 'music.163.com',
'User-Agent': ua.random
}
def get_comments(page,ite):
# 获取评论信息
# """
for key, values in ite.items():
song_id = key
song_name = values
url = 'http://music.163.com/api/v1/resource/comments/R_SO_4_'+ song_id +'?limit=20&offset=' + str(page)
# 使用ip代理池
# ip_list = [IP列表]
# proxies = get_random_ip(ip_list)
try:
# response = requests.get(url=url,headers=headers,proxies=proxies)
response = requests.get(url=url, headers=headers)
except Exception as e:
print(page)
print(ite)
result = json.loads(response.text)
items = result['comments']
for item in items:
# 用户名
user_name = item['user']['nickname'].replace(',',',')
# 用户ID
user_id = str(item['user']['userId'])
print(user_id)
# 评论内容
comment = item['content'].strip().replace('\n', '').replace(',', ',')
print(comment)
# 评论ID
comment_id = str(item['commentId'])
# 评论点赞数
praise = str(item['likedCount'])
# 评论时间
date = time.localtime(int(str(item['time'])[:10]))
date = time.strftime("%Y-%m-%d %H:%M:%S",date)
get_comments(1,song_list[0])