代码:
获取http/https协议的网页
# 获取http/https协议的网页
import socket
import ssl
def client(url, charset=None, headers=None):
conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# 默认添加请求头
if headers == None:
headers = 'User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
# http协议处理
if 'http://' in url:
url = url.replace('http://', '')
port = 80
# https协议处理
if 'https://' in url:
conn = ssl.wrap_socket(conn)
url = url.replace('https://', '')
port = 443
url = url if '/' in url else url + '/'
urlSplit = url.split('/', 1)
# 连接服务器
conn.connect((urlSplit[0], port))
# 发送报文处理
bMsg = 'GET /{1} HTTP/1.1\r\nHost: {0}\r\n{2}\r\nConnection: close\r\n\r\n'.format(urlSplit[0], urlSplit[1], headers)
# 发送报文
conn.send(bMsg.encode())
html = ''
# 循环接收html字节数据