import requests
import re
# 设置请求头
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
}
#访问网址
url="https://zhuanlan.zhihu.com/p/514271125"
def print_hi(name):
#获得文字
response = requests.get(url, headers=headers)#进入网址,获得网址元代码
response.encoding = 'utf-8'#转码
str=response.text#获得转码后源码
# print(str)
pattern = re.compile(r"<title data-rh=\"true\">(.+?)</title><meta name=\"viewport\" ")#正则表达式设置
print(pattern.findall(str))#匹配符合要求的数据(.+?)为数据内容
#获得图片
patternpng = re.compile(r"<link crossorigin=\"\" rel=\"shortcut icon\" type=\"image/x-icon\" href=\"(.+?)\"/>")#正则表达式设置
url1=patternpng.findall(str)#获得图片下载链接
urlpng=''.join(url1)#数组转字符串
print(urlpng)
responsepng = requests.get(urlpng, headers=headers)
f=open("png/zh.ico","wb")#设置保存地址为本目录下的png文件夹
f.write(responsepng.content)#图片转码后保存
patternpng1 = re.compile(r"class=\"origin_image zh-lightbox-thumb\" width=\"1914\" data-original=\"(.+?)\"/>")
url1 = patternpng1.findall(str)
# print(url1)
urlpng1 = ''.join(url1)
print(urlpng1)
responsepng = requests.get(urlpng1, headers=headers)
f = open("png/zh1.jpg", "wb")
#f = open("F:/sikuli/pachong/png/zh1.jpg", "wb")
f.write(responsepng.content)
网页图片文字信息获取
于 2023-07-03 17:15:22 首次发布