# -*- coding:utf-8 -*-
import requests
import random
# 获取内容
class Get_content(object):
def __init__(self):
self.headers = {
'User-Agent': random.choice([
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
])
}
def info(self, url,proxy_ip=None):
proxy_ip = {
# http://10.10.1.10:3128
'http': proxy_ip,
'https': proxy_ip,
}
try:
html = requests.get(url, headers=self.headers, timeout=(15, 20),proxies=proxy_ip, verify=False)
# print('状态码', html.status_code, url)
if html.status_code == 200:
bm = requests.utils.get_encodings_from_content(html.text)
if bm:
html.encoding = bm[0]
if (bm[0] != 'utf-8' or 'UTF-8' or 'utf8') and '�' in html.text:
html.encoding = 'utf-8'
if '�' in html.text:
html.encoding = 'gbk'
return html.text
else:
return None
except Exception as e:
return None
if __name__ == '__main__':
g = Get_content()
a = g.info('https://www.baidu.com/s?wd=ip','http://****:808')
print(a)
b = g.info('https://www.baidu.com/s?wd=ip')
print(b)