python爬虫七:爬取音乐V榜

转:https://zhuanlan.zhihu.com/p/26701898
# -*- coding: utf-8 -*-

import requests
from bs4 import BeautifulSoup
import random
#爬取音乐V榜
def get_html(url):
    try:
        r=requests.get(url,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text

    except:
        print ('wrong')


def get_content(url):
    html=get_html(url);
    soup=BeautifulSoup(html,'lxml');

    #获取对象
    ul=soup.find('ul',attrs={'class':'area_three area_list'})
    #获取集合
    li_list=ul.find_all('li',attrs={'name':'dmvLi'})

    for li in li_list:
        #print (li)
        name=li.find('a',attrs={'class':'mvname'}).text
        singer=li.find('a',attrs={'class':'special'}).text
        time=li.find('p',attrs={'class':'c9'}).text
        sco=li.find('div',attrs={'class':'score_box'}).h3.text
        print ('歌名:{}\t{}\n演唱者:{}\n评分:{}'.format(name,time,singer,sco))

def get_agent():
    '''
    模拟header的user-agent字段,
    返回一个随机的user-agent字典类型的键值对
    '''
    agents = ['Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;',
              'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1',
              'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11',
              'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
              'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)']
    fakeheader = {}
    fakeheader['User-agent'] = agents[random.randint(0, len(agents))]
    return fakeheader


def get_proxy():
    '''
    简答模拟代理池
    返回一个字典类型的键值对,
    '''
    proxy = ["http://116.211.143.11:80",
             "http://183.1.86.235:8118",
             "http://183.32.88.244:808",
             "http://121.40.42.35:9999",
             "http://222.94.148.210:808"]
    fakepxs = {}
    fakepxs['http'] = proxy[random.randint(0, len(proxy))]
    return fakepxs

def main(url):
    url_list=['ML','HT','KR','JP']

    for index in url_list:
        get_content(url+index)



if __name__=='__main__':
    url='http://vchart.yinyuetai.com/vchart/trends?area='
    main(url);

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值