from lxml import etree # 导入etree模块
root=etree.Element('root',interesting='totatlly') # 创建一个节点对象,即标签对象,节点名为root,节点属性为interesting
root.set('set','30') # 添加节点属性,属性名为set
root.text='data' # 设置对象的text属性为data,即标签中的文本
x=etree.tostring(root) # 将节点对象转化为bytes类型
one=etree.fromstring(x) # 将字符串转化为节点对象
two=etree.XML(x) # xml类型
thr=etree.HTML(x) # html类型
print(root)
print(root.tag) # root节点对象的标签名
print(root.text) # root节点对象的文本
print(type(root))
print(etree.tostring(one)) # 显示节点对象转化为bytes类型的数据
print(etree.tostring(two))
print(thr.text)
print(etree.tostring(thr))
<div>
<ul>
<li class="item-0"><a href="link1.html">first item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">haha<span class="bold">third item</span></a></li>
<li class="item-1"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
html=etree.parse('./hello.html')
result=etree.tostring(html,pretty_print=True)
res=html.xpath('//li/@class')
re1=html.xpath('//li[1]/@class')
re2=html.xpath('//li[last()]/@class')
re3=html.xpath('//li[last()-1]/@class')
re4=html.xpath('//li[last()-1]/a')
print(html)
print(result)
print(res)
print(re1)
print(re2[0])
print(type(re3[0]))
print(re4[0].text)
r=etree.XML(result)
print(r.find('ul'))
print(r.findall('.//li[@class]'))
for i in r.iterfind('.//li[@class]'):
print(i.tag)
html=etree.parse('./hello.html')
result=etree.tostring(html)
xml=etree.XML(result)
r1=xml.findall('.//li')
r2=html.xpath('//li/@class')
r3=xml.find(".//li/a[@href='link1.html']")
print(r1)
print(r2)
print(r3)
xml_doc = r'''<?xml version="1.0" ?>
<bookstore>
<book category="COOKING">
<title lang="en">Everyday Italian</title>
<author>Giada De Laurentiis</author>
<year>2005</year>
<price>30.00</price>
</book>
<book category="CHILDREN">
<title lang="en">Harry Potter</title>
<author>J K. Rowling</author>
<year>2005</year>
<price>29.99</price>
</book>
<book category="WEB">
<title lang="en">XQuery Kick Start</title>
<author>James McGovern</author>
<author>Per Bothner</author>
<author>Kurt Cagle</author>
<author>James Linn</author>
<author>Vaidyanathan Nagarajan</author>
<year>2003</year>
<price>49.99</price>
</book>
<book category="WEB">
<title lang="en">Learning XML</title>
<author>Erik T. Ray</author>
<year>2003</year>
<price>39.95</price>
</book>
<book category="WEB">
helloworld
<title lang="en">Learning XML</title>
<author>Erik T. Ray</author>
<year>2003</year>
<price>39.95
<tt>
haha
</tt>
</price>
</book>
</bookstore> '''
tree=etree.XML(xml_doc)
r1=tree.xpath('/bookstore/book[price>30]/title')
for i in r1:
print(i.text)