BeautifulSoup报错input conversion failed due to input error

简单又暴力的方法,自己可以试试:

直接看代码:

from bs4 import BeautifulSoup, Comment
import requests
from retrying import retry


@retry(stop_max_attempt_number=5)
def _get_url_three_content(requests_url):
    proxies = None
    random_header = {}
    add_header = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Host': 'www.bilibili.com',
        'Upgrade-Insecure-Requests': '1',
    }
    last_header = dict()
    last_header.update(random_header)
    last_header.update(add_header)
    html = requests.get(requests_url, headers=last_header, proxies=proxies, timeout=6)
    if html.status_code == 404:
        return ''
    assert html.status_code == 200
    return html


def run():
    requests_url = 'https://www.baidu.com/s?wd=BeautifulSoup%E6%8A%A5%E9%94%99input%20conversion%20failed%20due%20to%20input%20error&rsv_spt=1&rsv_iqid=0xfeb80b100001c5bc&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=1&rsv_t=c3b8j6jMfVRrUK9Ad2ZUcxta09Cvd%2FPSY%2F5CwKC47Rb7tjEQJoY55RMx02dspeHzzQdv&oq=encoding%2520error%2520%253A%2520input%2520conversion%2520failed%2520due%2520to%2520input%2520error%252C%2520bytes%25200x9D%25200x%2526gt%253B6&inputT=1475&rsv_sug3=51&rsv_pq=a7659e250004e379&rsv_sug1=2&rsv_sug7=001&rsv_n=2&bs=encoding%20error%20%3A%20input%20conversion%20failed%20due%20to%20input%20error%2C%20bytes%200x9D%200xE6'
    html = _get_url_three_content(requests_url=requests_url)
    html_str = html.content.decode('utf-8', 'ignore')
    html_str = html_str.split('<body')[-1]
    html_str = '<body' + html_str

    # 这样BeautifulSoup内部就不会报错了
    soup=BeautifulSoup(html_str,'lxml')

if __name__ == '__main__':
    run()

 

 

 

 

 

 

 

 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值