转:https://zhuanlan.zhihu.com/p/26701898
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import random
#爬取音乐V榜
def get_html(url):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
print ('wrong')
def get_content(url):
html=get_html(url);
soup=BeautifulSoup(html,'lxml');
#获取对象
ul=soup.find('ul',attrs={'class':'area_three area_list'})
#获取集合
li_list=ul.find_all('li',attrs={'name':'dmvLi'})
for li in li_list:
#print (li)
name=li.find('a',attrs={'class':'mvname'}).text
singer=li.find('a',attrs={'class':'special'}).text
time=li.find('p',attrs={'class':'c9'}).text
sco=li.find('div',attrs={'class':'score_box'}).h3.text
print ('歌名:{}\t{}\n演唱者:{}\n评分:{}'.format(name,time,singer,sco))
def get_agent():
'''
模拟header的user-agent字段,
返回一个随机的user-agent字典类型的键值对
'''
agents = ['Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1',
'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)']
fakeheader = {}
fakeheader['User-agent'] = agents[random.randint(0, len(agents))]
return fakeheader
def get_proxy():
'''
简答模拟代理池
返回一个字典类型的键值对,
'''
proxy = ["http://116.211.143.11:80",
"http://183.1.86.235:8118",
"http://183.32.88.244:808",
"http://121.40.42.35:9999",
"http://222.94.148.210:808"]
fakepxs = {}
fakepxs['http'] = proxy[random.randint(0, len(proxy))]
return fakepxs
def main(url):
url_list=['ML','HT','KR','JP']
for index in url_list:
get_content(url+index)
if __name__=='__main__':
url='http://vchart.yinyuetai.com/vchart/trends?area='
main(url);