from bs4 import BeautifulSoup f = open("C:/Users/JIA/Desktop/数据结构/file.txt", "r") lines = f.readlines() # 读取全部内容 for line in lines: html = line # 用BeautifulSoup解析数据 python3 必须传入参数二'html.parser' 得到一个对象,接下来获取对象的相关属性 html = BeautifulSoup(html, 'html.parser') attrs = html.a.attrs if attrs['data-type'] == "ppt": print(attrs['data-title'] + attrs['data-type'], attrs['data-url']) else: print(attrs['data-title'], attrs['data-url'])
python实现对html文件内的每一行的特定标签的值的读取
最新推荐文章于 2024-07-29 02:32:41 发布