python抓取5sing上的歌曲

最新推荐文章于 2024-04-10 11:10:47 发布

emako_

最新推荐文章于 2024-04-10 11:10:47 发布

阅读量1.2k

点赞数 1

分类专栏： python

本文链接：https://blog.csdn.net/mccoy39082/article/details/84260972

版权

python 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

以 http://5sing.kugou.com/inory/fc/1.html 为例

#coding:utf-8
from bs4 import BeautifulSoup
import requests
import os
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
import time
import subprocess
import sys
import os
import io

url_cover = 'http://5sing.kugou.com/inory/fc/{count}.html'
url_covers = []
ffsave = 'music_save'
isSave = False

def save(filename, contents):
	fh = open(filename, 'w+', encoding='utf-8')
	fh.write(contents)
	fh.close()

def save_append(filename, contents):
	fh = open(filename, 'a+', encoding='utf-8')
	fh.write(contents)
	fh.close()

chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_experimental_option("prefs", {'profile.default_content_settings.popups' : 0, 'download.default_directory' : '.'})
browser = webdriver.Chrome(chrome_options = chromeOptions)
browser.set_page_load_timeout(5)

for i in range(1, 12):
	url_current = url_cover.format(count=i)
	browser.get(url_current)
	WebDriverWait(browser, 2)
	html = browser.page_source
	soup = BeautifulSoup(html, "lxml")
	links = soup.select('a')
	for link in links:
		href = link.get('href')
		if (href.startswith('http://5sing.kugou.com/fc/')) and href.endswith('html'):
			title = link.get('title')
			url_covers.append(href)
			print(title, href)
	print('')

if not os.path.isdir(ffsave):
	os.mkdir(ffsave)

for url in url_covers:
	try:
		browser.get(url)
		browser.set_page_load_timeout(10)
		WebDriverWait(browser, 5)
		html = browser.page_source
		soup = BeautifulSoup(html, "lxml")
		title = soup.select('title')[0].get_text()
		link = soup.select('audio')[0].get('src')
		cmd = 'ffmpeg -i "{url}" -c copy "{filename}.mp3" -y'.format(url=link, filename=title)
		if isSave:
			subprocess.Popen(cmd)
		save_append('{ffsave}/music_url.txt'.format(ffsave=ffsave), cmd + '\r\n')
		print(title, link)
		print(cmd)
		print('')
	except:
		print('download {url} failed!'.format(url=url))
		save_append('{ffsave}/music_error.txt'.format(ffsave=ffsave), 'download {url} failed!'.format(url=url))

browser.quit()

#coding:utf-8
import subprocess
import sys
import time

def read(filename):
	fh = open(filename, 'r', encoding='utf-8')
	lines = fh.readlines()
	fh.close()
	return lines

lines = read('music_url.bat')

for line in lines:
	cmd = line.replace('\r', '')
	cmd = cmd.replace('\n', '')
	print(cmd)
	subprocess.Popen(cmd)
	time.sleep(0)