一、正则表达式
1.1 简单匹配
import re
'''
findall方法:
re.findall(pattern,string,flag=0)
pattern:正则表达式
string:要查找的字符串
flag:匹配模式
返回值:返回string中与pattern匹配的结果列表
'''
rs = re.findall('abc','askdjhakjdhabcasldkjasdh')
print(rs)
rs = re.findall('a.c','askdhaksjhdadckajdhkajcdksjfha\ndhd')
print(rs)
rs = re.findall('a\.x','askdhasjkdha.xakjfkhskjdhfkh')
print(rs)
rs = re.findall('a[bcd]e','abeaksjdhkajhaceakjshdadesfkhhlaee')
print(rs)
rs = re.findall('\d','skdjfhdkjh3akfjh2sdkjfh')
print(rs)
rs = re.findall('\w','sds123中%$^')
print(rs)
rs = re.findall('a\d*','a213')
print(rs)
rs = re.findall('a\d*','a')
print(rs)
rs = re.findall('a\d+','a')
print(rs)
rs = re.findall('a\d?','a123')
print(rs)
rs = re.findall('a\d{2}','a13798')
print(rs)
1.2 匹配模式和分组
import re
'''
指定re.findall方法的匹配模式
'''
rs = re.findall('a.c','a\nc',re.DOTALL)
print(rs)
rs = re.findall('a.c','a\nc',re.S)
print(rs)
'''
findall方法中分组的使用
'''
rs = re.findall('a.+c','a\nc',re.DOTALL)
print(rs)
rs = re.findall('a(.+)c','a\nc',re.DOTALL)
print(rs)
1.3 r原串
import re
'''
正则中使用r原串,能忽略转移字符带来的影响
待匹配的字符串中有几个'\',就在r原串中加上几个'\'
'''
rs = re.findall('a\\nbc','a\\nbc')
print(rs)
rs = re.findall('a\\\\nbc','a\\nbc')
print(rs)
rs = re.findall(r'a\\nbc','a\\nbc')
print(rs)
1.4 提取丁香园疫情的json数据
import re
import requests
from bs4 import BeautifulSoup
response = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')
data = response.content.decode()
soup = BeautifulSoup(data, 'lxml')
script = soup.find(id='getListByCountryTypeService2true')
text = script.string
json = re.findall(r'\[.*]',text)[0]
print(json)
二、python与json数据的互相转换
2.1 json转python
import json
'''
python中的json模块
json模块是python中自带的模块,用于json与python数据间的相互转换
本例演示json转换为python
'''
json_str='''[{"provinceName":"美国","currentConfirmedCount":1179041,"confirmedCount":1643499},{"provinceName":"英国",
"currentConfirmedCount":222227,"confirmedCount":259559}]'''
rs = json.loads(json_str)
print(rs)
with open('test.json',encoding='utf-8') as file:
rs = json.load(file)
print(rs)
2.2 python转json
import json
'''
python类型的数据转为json
'''
s = '''[{"provinceName":"美国","currentConfirmedCount":1179041,"confirmedCount":1643499},{"provinceName":"英国",
"currentConfirmedCount":222227,"confirmedCount":259559}]'''
rs = json.loads(s)
s = json.dumps(rs,ensure_ascii=False)
print(s)
with open('test1.json','w',encoding='utf-8') as fp:
json.dump(rs,fp,ensure_ascii=False)