根据爬虫和tornado搭建百度音乐网站。/20171225
测试:
from tornado import web, httpserver, ioloop
# 逻辑处理模块
class MainPageHandler(web.RequestHandler):
def get(self, *args, **kwargs):
# self.write('我的第一个简陋网站……')
self.render('查找.html')
# 路由
application = web.Application([
(r"/", MainPageHandler),
])
# application = web.Application([
# (r"/index", MainPageHandler),
# ]) # 127.0.0.1:8080/index
# socket服务
if __name__ == '__main__':
http_server = httpserver.HTTPServer(application)
http_server.listen(8080) # 设置端口
ioloop.IOLoop.current().start()
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<h1>My First Shabby Web...</h1>
<!--<a href="" download="download">下载</a>-->
</body>
</html>
class MainPageHandler(web.RequestHandler):
def get(self, *args, **kwargs):
self.write('我的第一个简陋网站……')
self.render('查找.html')
class GetMusicInfoHandler(web.RequestHandler):
def post(self, *args, **kwargs):
query = self.get_argument('query')
print(query)
# 路由
application = web.Application([
(r"/", MainPageHandler),
(r"/query", GetMusicInfoHandler)
])
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<h1>My First Shabby Web...</h1>
<form action="/query" method="post">
<p>请输入要搜索的歌手的名字:<input type="text" name="query"></p>
<p><input type="submit" value="提交"</p>
</form>
<!--<a href="" download="download">下载</a>-->
</body>
</html>
为啥pycharm里面的html文件中,我把一条病句注释掉了,运行程序还会提示我出错啊啊啊啊啊!/20171226
project:
# 爬虫.py
import requests
import re
from fake_useragent import UserAgent
import json
def get_html(url):
response = requests.get(url,headers = {'User-Agent':UserAgent().random})
return response
def search_songs(singer_url):
singer_response = get_html(singer_url)
singer_response.encoding = 'utf-8'
pattern = re.compile(r'sid":(\d+?),"', re.S)
song_lists = re.findall(pattern, singer_response.text)
for song_list in song_lists:
yield song_list
def one_page(singer_url):
FileLink_list=[]
for sid in search_songs(singer_url):
api = 'http://musicapi.qianqian.com/v1/restserver/ting?method=baidu.ting.song.play'\
+'&format=jsonp&callback=jQuery1720006718159514979449_1513784517086&songid=%s&_=1513784517579' % sid
api_response = get_html(api)
FileLink_json = re.findall(r'\((.+)\)',api_response.text)[0]
FileLink_json = json.loads(FileLink_json)
FileLink_list.append(FileLink_json)
return FileLink_list
def main(singer_name):
singer_url = 'http://music.baidu.com/search?key=' + singer_name + '&jump=0&start=0&size=20&third_type =0'
return one_page(singer_url)
if __name__ == "__main__":
singer_name = input('输入要查询的歌手姓名:')
main(singer_name)
# test = main(singer_name)
# print(test)
# print(type(test), len(test))
<!--查找.html-->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>姜枫渔火</title>
</head>
<body>
<h1>My First Shabby Web...</h1>
<form action="/query" method="post">
<p>请输入要搜索的歌手的名字:<input type="text" name="query"></p>
<p><input type="submit" value="提交"</p>
</form>
<!--<a href="" download="download">下载</a>-->
</body>
</html>
<!--音乐.html-->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>歌曲列表</title>
</head>
<body>
<!--{{ music }}-->
<table>
<tr>
<th>序号</th>
<th>歌名</th>
<th>图片</th>
<th>下载</th>
</tr>
{% for index, item in enumerate(music) %}
<tr>
<td>{{ index }}</td>
<td>{{ item['songinfo']['title'] }}</td>
<td><img src="{{ item['songinfo']['pic_small'] }}" alt=""></td>
<td><a href="{{ item['bitrate']['file_link'] }}" download="download">下载</a> </td>
</tr>
{% end %}
</table>
<!--{% for i in music %}-->
<!--<p>{{ i }}</p>-->
<!--{% end %}-->
</body>
</html>
# 百度音乐网站……
from tornado import web, httpserver, ioloop
from 爬音乐 import main
# 逻辑处理模块
class MainPageHandler(web.RequestHandler):
def get(self, *args, **kwargs):
self.write('我的第一个简陋网站……')
self.render('查找.html')
class GetMusicInfoHandler(web.RequestHandler):
def post(self, *args, **kwargs):
query = self.get_argument('query')
print(query)
music_info = main(query)
# print(music_info[0], type(music_info), len(music_info))
self.render('音乐.html', music = music_info)
# 路由
application = web.Application([
(r"/", MainPageHandler),
(r"/query", GetMusicInfoHandler)
])
# application = web.Application([
# (r"/index", MainPageHandler),
# ]) # 127.0.0.1:8080/index
# socket服务
if __name__ == '__main__':
http_server = httpserver.HTTPServer(application)
http_server.listen(8080) # 设置端口
ioloop.IOLoop.current().start()