python爬虫出问题了，啊啊啊求大佬解决

2301_76231140

于 2024-09-06 18:06:45 发布

阅读量1.1k

点赞数 25

文章标签： python 爬虫开发语言

本文链接：https://blog.csdn.net/2301_76231140/article/details/141966187

版权

正常代码是能爬出列表的

etree就正常报错就摁小灯泡第一条添加几行代码就这样啦

现在也找不到错误代码在哪

更糟糕的是pycharm设置点了也没反应打不开。

代码如下：

import requests,json
from lxml import etree


url = 'https://music.163.com/discover/artist'
singer_infos = []


# ---------------通过url获取该页面的内容，返回xpath对象
# ...其他代码...
def get_xpath(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    return etree.HTMLresponse.text # 去掉外层括号

# ...其他函数保持不变...


# --------------通过get_xpath爬取到页面后，我们获取华宇，华宇男等分类
def parse():
    html = get_xpath(url)
    fenlei_url_list = html.xpath('//ul[@class="nav f-cb"]/li/a/@href')  # 获取华宇等分类的url
    # print(fenlei_url_list)
    # --------将热门和推荐两栏去掉筛选
    new_list = [i for i in fenlei_url_list if 'id' in i]
    for i in new_list:
        fenlei_url = 'https://music.163.com' + i
        parse_fenlei(fenlei_url)
        # print(fenlei_url)


# -------------通过传入的分类url，获取A,B，C页面内容
def parse_fenlei(url):
    html = get_xpath(url)
    # 获得字母排序，每个字母的链接
    zimu_url_list = html.xpath('//ul[@id="initial-selector"]/li[position()>1]/a/@href')
    for i in zimu_url_list:
        zimu_url = 'https://music.163.com' + i
        parse_singer(zimu_url)


# ---------------------传入获得的字母链接，开始爬取歌手内容
def parse_singer(url):
    html = get_xpath(url)
    item = {}
    singer_names = html.xpath('//ul[@id="m-artist-box"]/li/p/a/text()')
    # --详情页看到页面结构会有两个a标签，所以取第一个
    singer_href = html.xpath('//ul[@id="m-artist-box"]/li/p/a[1]/@href')
    # print(singer_names,singer_href)
    for i, name in enumerate(singer_names):
        item['歌手名'] = name
        item['音乐链接'] = 'https://music.163.com' + singer_href[i].strip()
        # 获取歌手详情页的链接
        url = item['音乐链接'].replace(r'?id', '/desc?id')
        # print(url)
        parse_detail(url, item)

        print(item)


# ---------获取详情页url和存着歌手名字和音乐列表的字典，在字典中添加详情页数据
def parse_detail(url, item):
    html = get_xpath(url)
    desc_list = html.xpath('//div[@class="n-artdesc"]/p/text()')
    item['歌手信息'] = desc_list
    singer_infos.append(item)
    write_singer(item)


# ----------------将数据字典写入歌手文件
def write_singer(item):
    with open('singer.json', 'a+', encoding='utf-8') as file:
        json.dump(item,file)


if __name__ == '__main__':
     parse()