from lxml import etree
def start_line(mark):
print("--------"+mark+"-------")
def case_element():
start_line(“element”)
#新建一个节点
root=etree.Element('root')
print(type(root))
print(root.tag)
#新建时加属性
root=etree.Element('root',interesting='totally')
print(etree.tostring(root))
#新建以后再追加属性
root.set('age','18')
print(etree.tostring(root))
#追加文本
root.text='Hello World'
print(etree.tostring(root))
def case_elementtree():
start_line(‘element tree’)
xml_data=‘data’
#fromstring
root_one=etree.fromstring(xml_data)
print(type(root_one))
print(root_one.tag)
#xml
root_two=etree.XML(xml_data)
print(type(root_two))
print(root_two.tag)
#html
root_three=etree.HTML(xml_data)
print(type(root_three))
print(root_three.tag)
print(etree.tostring(root_three))
#parse
root_four=etree.parse('./book.xml')
print(type(root_four))
print(root_four)
print(etree.tostring(root_four,pretty_print=False))
print(etree.tostring(root_four,pretty_print=True))
#搜索bookstore节点
def case_xpath001():
start_line(‘xpath 001’)
mytree=etree.parse(’./book.xml’)
result=mytree.xpath('//book')
print(result)
print(len(result))
print(type(result))
print(result[0].tag)
def case_xpath002():
start_line(‘xpath 002’)
mytree=etree.parse(’./book.xml’)
result=mytree.xpath(’//book/@category’)
print(result)
result=mytree.xpath('//book[@category="web"]')
print(result)
print(etree.tostring(result[0]))
# result=mytree.xpath('//book[last()-1]/author')
result=mytree.xpath('/bookstore/book[@category="children"]/*')
print(result[0].text)
print(result[0].tag)
if name == ‘main’:
case_element()
case_elementtree()
case_xpath001()
case_xpath002()
from bs4 import BeautifulSoup
import re
html_doc="""
The Dormouse's story
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body>
"""
def start_line(mark):
print("--------" + mark + “-------”)
def case_findelement():
start_line(‘element’)
soup = BeautifulSoup(html_doc, 'lxml')
print(soup)
print(soup.prettify())
print(soup.title)
print(soup.title.prettify())
print(soup.title.name)
print(soup.title.string)
print(soup.title.parent.name)
print(soup.p)
print(soup.p['class'])
print(soup.a)
def case_findall001():
start_line(‘findall 001’)
soup=BeautifulSoup(html_doc,‘lxml’)
print(soup.find_all(‘b’))
print(soup.find_all(re.compile("^b")))
print(soup.find_all([“a”,“b”]))
def case_findall002():
start_line(‘findall 002’)
soup=BeautifulSoup(html_doc,‘lxml’)
print(soup.find_all(id=‘link2’))
print(soup.find_all(id=‘link1’,href=re.compile(“elsie”)))
print(soup.find_all(“a”,class_=“sister”))
print(soup.find_all(text="Elsie"))
print(soup.find_all(text=["Elsie","Tillie","Lacie"]))
print(soup.find_all("a"))
print(soup.find_all("a",limit=2))
print(soup.find_all("title"))
print(soup.find_all("title",recursive=False))
def case_findall003():
start_line(‘findall 003’)
soup = BeautifulSoup(html_doc, ‘lxml’)
for link in soup.find_all(‘a’):
print(link.get(‘href’))
print(soup.get_text())
def case_cssfind():
start_line(‘cssfind’)
soup=BeautifulSoup(html_doc,‘lxml’)
print(soup.select("title"))
print(soup.select(".sister"))
print(soup.select("#link1"))
print(soup.select("p #link2"))
print(soup.select('a[href="http://example.com/tillie"]'))
if name == ‘main’:
case_findelement()
case_findall001()
case_findall002()
case_findall003()
case_cssfind()
import json
import jsonpath
bookjson="""
{
“store”:{
“book”:[
{
“category”:“reference”,
“author”:“Nigel Rees”,
“title”:“Sayings of the Century”,
“price”:8.95
},
{
“category”:“fiction”,
“author”:“J. R. R. Tolkien”,
“title”:“The Lord of the Rings”,
“isbn”:“0-395-19395-8”,
“price”:22.99
}
],
“bicycle”:{
“color”:“red”,
“price”:19.95
}
}
}
“”"
def start_line(mark):
print("--------" + mark + “-------”)
def case_loads():
start_line(‘loads’)
start_list='[1,2,3,4]'
str_dict='{"city":"北京","name":"张三"}'
print(type(start_list))
print(type(str_dict))
print(type(json.loads(start_list)))
print(type(json.loads(str_dict)))
def case_dumps():
start_line(‘dumps’)
demo_list=[1,2,3,4]
demo_tuple=(1,2,3,4)
demo_dict={“city”:“北京”,“name”:“张三”}
print(type(demo_list))
print(type(demo_tuple))
print(type(demo_dict))
print(type(json.dumps(demo_list)))
print(type(json.dumps(demo_tuple)))
print(type(json.dumps(demo_dict)))
def case_load():
str_obj=json.load(open(“book.json”))
print(type(str_obj))
print(str_obj)
print(str_obj[‘store’][‘book’][0][‘price’])
str_obj={"city":"北京","name":"张三"}
json.dump(str_obj,open("book002.json","w"))
json.dump(str_obj,open("book002.json","w"),ensure_ascii=False)
def case_jsonpath():
bookobj=json.loads(bookjson)
checkurl="$.store.bicycle.color"
print(jsonpath.jsonpath(bookobj, checkurl))
checkurl = "$.store.book[*]"
print(jsonpath.jsonpath(bookobj, checkurl))
checkurl = "$.store.book[0]"
print(jsonpath.jsonpath(bookobj, checkurl))
checkurl = "$.store.book[*].title"
print(jsonpath.jsonpath(bookobj, checkurl))
checkurl = "$.store.book[?(@.category=='fiction')]"
print(jsonpath.jsonpath(bookobj, checkurl))
checkurl = "$.store.book[?(@.price<10)]"
print(jsonpath.jsonpath(bookobj, checkurl))
checkurl = "$.store.book[?(@.isbn)]"
print(jsonpath.jsonpath(bookobj, checkurl))
if name == ‘main’:
case_dumps()
case_loads()
case_load()
case_jsonpath()