import requests
from lxml import etree
import time
import os
import re
def huoquyuanma(url = 'https://www.tujigu.com/'):
headers = {
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.8',
'Cache-Control': 'max-age=0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
'Connection': 'keep-alive',
'Referer': 'http://www.baidu.com/'
}
try:
new = requests.get(url , headers=headers , timeout = 16) #获取源码
time.sleep(2) #等待
new.encoding = new.apparent_encoding #防乱码
t = new.text
t = etree.HTML(t) #转换成xml格式
return