from pyquery import PyQuery as pq
def get_file(filename): doc = pq(filename=filename) # 通过本地文件 div_all = doc.find('div[data-asin]') # 获取所有div中属性为data-asin for index in range(len(div_all)): # 循环输出单个div为data-asin中的内容 html = div_all[index] div_index = pq(html) asin = div_index.attr("data-asin")
filename="F:\数据提取\yangben\A101_cat toys_1.txt" get_file(filename)