Python requests 解决乱码问题

最新推荐文章于 2024-07-12 16:58:11 发布

Test_C.

最新推荐文章于 2024-07-12 16:58:11 发布

阅读量341

点赞数

分类专栏： Python requests

本文链接：https://blog.csdn.net/weixin_42544006/article/details/90549496

版权

Python 同时被 2 个专栏收录

110 篇文章 3 订阅

订阅专栏

requests

8 篇文章 0 订阅

订阅专栏

# -*- coding:utf-8 -*-
import requests
import random

# 获取内容
class Get_content(object):
    def __init__(self):

        self.headers = {
            'User-Agent': random.choice([
                "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
                "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
                "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
                "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
                "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
                "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
                "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
                "Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
            ])
        }

    def info(self, url,proxy_ip=None):
        proxy_ip = {
            # http://10.10.1.10:3128
            'http': proxy_ip,
            'https': proxy_ip,
        }
        try:
            html = requests.get(url, headers=self.headers, timeout=(15, 20),proxies=proxy_ip, verify=False)
            # print('状态码', html.status_code, url)
            if html.status_code == 200:
                bm = requests.utils.get_encodings_from_content(html.text)
                if bm:
                    html.encoding = bm[0]
                    if (bm[0] != 'utf-8' or 'UTF-8' or 'utf8') and '�' in html.text:
                        html.encoding = 'utf-8'
                        if '�' in html.text:
                            html.encoding = 'gbk'

                    return html.text
            else:
                return None
        except Exception as e:
            return None

if __name__ == '__main__':
    g = Get_content()
    a = g.info('https://www.baidu.com/s?wd=ip','http://****:808')
    print(a)

    b = g.info('https://www.baidu.com/s?wd=ip')
    print(b)

Test_C.

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python requests 解决乱码问题

# -*- coding:utf-8 -*-import requestsimport random# 获取内容class Get_content(object): def __init__(self): self.headers = { 'User-Agent': random.choice([ "Mo...
复制链接

扫一扫

专栏目录