源码
# 导入python自带包urllib
import urllib.request
import urllib.parse
from lxml import etree
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
# 定义要访问的地址
url = 'http://www.baidu.com/'
#请求对象的定制
request = urllib.request.Request(url=url,headers=headers)
# 模拟浏览器向服务器发送请求
response = urllib.request.urlopen(url)
# 获取响应的源码为字节形式,使用decode('utf-8')解码
content = response.read().decode('utf-8')
# 保存
urllib.request.urlretrieve(url, 'baidu.html')
# print(content)
# 解析服务器响应的文件
tree = etree.HTML(content)
# 获取想要的数据,xpath返回的数据是数组类型
result = tree.xpath('//input[@id="su"]/@value')[0]
print(result)