Python学习-7.8-CSDN博客

本文链接：https://blog.csdn.net/qq_48187055/article/details/107023249

Chapter 7

普通/正则表达式查找

#用不正则表达式查找文本
def isphone(text):
    if len(text)!=12:
        return False
    for i in range(0,3):
        if not text[i].isdecimal():
            return False
    if text[3]!='-':
        return False
    for i in range(4,7):
        if not text[i].isdecimal():
            return False
    if text[7]!='-':
        return False
    for i in range(8,12):
        if not text[i].isdecimal():
            return False
    return True
print('123-456-7890 is a phone number:')
print(isphone('123-456-7890'))
print('xiha is a phone number:')
print(isphone('xiha'))
#下列函数代替上述4个print函数
message='Call me at 123-456-7890 tomorrow. 123-456-0789 is my office.'
for i in range(len(message)):
    chunk=message[i:i+12]
    if isphone(chunk):
        print('phone number is: '+chunk)
print('Done')  
>>> 
123-456-7890 is a phone number:
True
xiha is a phone number:
False
phone number is: 123-456-7890
phone number is: 123-456-0789
Done

# 正则表达式=regex
# 用正则表达式查找文本模式
# r'字符串'-原始字符串
import re
phone=re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo=phone.search('my nuber is 123-456-7890.')
print('phone is: '+mo.group())
>>> 
phone is: 123-456-7890

正则表达式符号含义

符号	含义
（）	分组
竖线	匹配多个表达式中的一个（search匹配第一个，findall匹配全部）
？	匹配0次或1次
*	匹配0次或多次
+	匹配1次或多次
{n}	匹配n次
{n,m}	匹配n到m次（在group函数中为匹配m次，贪心匹配）
{n,}	匹配n到无穷次
{n,m}？	匹配n次（非贪心匹配）
\d,\w,\s	数字，单词（字母、数字和下划线），空白（空格、制表符和换行符）
\D,\W,\S	除数字，单词，空白以外的字符
^spam	字符串必须以spam开始
spam$	字符串必须以spam结束
.	匹配换行符以外的所有字符(只匹配一个字符)
[asd]	匹配方括号内所有字符，在方括号中正则表达式不被解释，
[^asd]	匹配方括号内所有字符以外的所有字符

正则表达式匹配更多模式（例子）

>>> import re
>>> phone=re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)') # 括号代表分组
>>> mo=phone.search('phone is 111-222-3333')
>>> mo.group() #返回多个值的字符串
'111-222-3333'
>>> mo.group(1) #返回第一个分组
'111'
>>> mo.groups() #返回所有分组
('111', '222-3333')
>>> area,mianumber=mo.groups()  #多重赋值
>>> area
'111'
>>> mianumber
'222-3333'
>>> phone=re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)') #若要匹配括号，则需用倒斜杠对（）进行字符转义
>>> mo=phone.search('phone is (123) 456-7890')
>>> mo.groups() #groups返回的是元组
('(123)', '456-7890')

#字符|称为“管道”，用在匹配多个表达式中的一个时，若都在查找对象中，返回第一次出现的匹配文本
>>> phone=re.compile(r'bat|tom')
>>> mo=phone.search('bat and tom')
>>> mo.group()
'bat'
>>> mo=phone.search('tom and bat')
>>> mo.group()
'tom'
>>> bat=re.compile(r'Bat(man|mobile|bat)')
>>> mo=bat.search('Batmobile lost.')
>>> mo.group()
'Batmobile'
>>> mo.group(1)
'mobile'
# 用问号实现可选匹配~（字符内容）？
>>> phone=re.compile(r'(\d\d\d-)?\d\d\d-\d\d\d\d')
>>> mo=phone.search('123-456-7890')
>>> mo.group()
'123-456-7890'
>>> mo=phone.search('456-7890')
>>> mo.group()
# 用星号匹配0次或多次，星号之前的分组可在文本中出现无数次
>>> woregex=re.compile(r'bat(wo)*man')
>>> mo=woregex.search('the batwowoman')
>>> mo.group()
'batwowoman'
>>> mo1=woregex.search('the batman')
>>> mo1.group()
'batman'
# 用+匹配0次或多次
>>> woregex=re.compile(r'bat(wo)+man')
>>> mo=woregex.search('the batwowoman')
>>> mo.group()
'batwowoman'
>>> mo1=woregex.search('the batman')
>>> mo1==None
True
# {}匹配特定次数
ppp=re.compile(r'(p){5}') #匹配5次
>>> m1=ppp.search('pppppp')
>>> m1.group()
'ppppp'
>>> pp=re.compile(r'(p){3,5}') #贪心匹配，匹配最长字符串
>>> m2=pp.search('pppppp')
>>> m2.group()
'ppppp'
>>> pp=re.compile(r'(p){3,5}?') # 非贪心匹配，匹配最短字符串
>>> m3=pp.search('pppppp')
>>> m3.group()
'ppp'

findall函数,sub函数

函数	含义	例
search	返回第一次出现的匹配文本
findall （无分组）	返回一个匹配字符串的列表	[‘11-22’,‘44-55-66’]
findall （有分组）	返回一个字符串的元组的列表	[(‘11’,‘22’),(‘44’,‘55’,‘66’)]
sub（字符串，字符串）	（替换内容，正则表达式匹配的内容）

# 用［］定义匹配的字符分类，findall（无group函数，匹配全部）和search（匹配第一个）
>>> p=re.compile(r'[abc]')
>>> m4=p.search('afbhuxdc')
>>> m4.group()
'a'
>>> m5=p.findall('afbbcahuxdc')
>>> m5
['a', 'b', 'b', 'c', 'a', 'c']
# sub 函数
>>> a=re.compile(r'Agent \w+')
>>> a.sub(r'people','Agent Psc bought a gift for Agent HL')
'people bought a gift for people'
>>> a=re.compile(r'Agent (\w)\w*') #（\w）分组，\1将由分组1的文本所取代，保留姓名的第一个字母
>>> a.sub(r'\1**','Agent Psc bought a gift for Agent HL')
'P** bought a gift for H**'

re.compile()的第二个参数	对应作用
re.DOTALL	让句点字符匹配所有字符，包括换行符
re.I	不区分大小写
re.VERBOSE	忽略正则表达式字符串中空白符和注释

注：可用|连接多种功能，如re.DOTALL|re.I

# '.*',re.DOTALL匹配所有字符
>>> n=re.compile('.*',re.DOTALL )
>>> n.search('psc is cute.\npsc is responsible').group()
'psc is cute.\npsc is responsible'

7.18 实践项目

# 7.18.1 强口令检测
import re
passwords=input('输入您的密码：')
def checkword(password):
    before=re.compile(r'.*[a-z].*')
    mid=re.compile(r'.*[0-9].*')
    after=re.compile(r'.*[A-Z].*')
    pw1=before.search(password)
    pw2=mid.search(password)
    pw3=after.search(password)
    if (len(password)>=8)and(pw1!=None)and(pw2!=None)and(pw3!=None):
        print('您的密码强度符合要求.')
    else:
        print('您的密码强度不符合要求.')
checkword(passwords)
# 7.18.2 strip()函数的正则表达式
import re
psc1=input('输入待处理字符串：')
psc2=input('输入想删去的字符串：')
def deletewords(p1,p2):
    pw1=re.compile(r'^\s+|\s+$')
    pw2=re.compile(p2)
    if p2:
        ps=pw2.sub('',p1)
    else:
        ps=pw1.sub('',p1)
    print(ps)
        
deletewords(psc1,psc2)
>>> 
输入您的密码：2123
您的密码强度不符合要求.
输入待处理字符串：   123456  
输入想删去的字符串：23
   1456

Chapter 8

os模块函数	meaning
os.path,join()	将传入的单个文件夹名称返回文件路径的字符串
os.getcwd()	返回当前工作目录
os.chdir()	改变当前工作目录到指定的路径
os.makedir()	创建新的文件夹
os.path.abspath()	返回绝对路径的字符串
os.path.isabs(path)	若是绝对路径，返回True
os.path.relpath(path,start)	返回从start到path的相对路径的字符串，若无start，则把当前目录作为开始路径
os.path.dirname()	目录名称（最后一根斜杠之前的所有内容）
os.path.basename()	基本名称（最后一根斜杠之后的所有内容）
os.path.split()	返回目录名称和基本名称的元组
path.split(os.path.sep)	返回路径每个文件夹的列表
os.path.getsize(path)	返回path参数中文件的字节数
os.listdir(path)	返回文件名称字符串的列表
os.path.exists(path)	若path存在，返回True
os.path.isfie(path)	若path存在且为文件，返回True
os.path.isdir(path)	若path存在且为文件夹，返回True

文件读写过程

# 读写文件
hifile=open('hi.txt','w')  #'w'-写模式
>>> hifile.write('hi,world.\n') # 写入的内容，并返回写入字符个数
10                                          
>>> hifile.close()
>>> hifile=open('hi.txt','a') #'a'-添加模式
>>> hifile.write('i am tired,but i must continue.')
31
hifile.close()
hifile=open('hi.txt')
>>> com=hifile.read() # read读模式
>>> hifile.close()
>>> print(com)
hi,world.
i am tired,but i must continue.
# read（）函数返回文件所有内容的字符串
'hi,world.\ni am tired,but i must continue.'
# readlines（）函数返回字符串的列表，一个字符串是文本的一行内容
>>> hifile.close()
>>> hifile=open('hi.txt')
>>> hifile.readlines()
['hi,world.\n', 'i am tired,but i must continue.']

保存变量

# shelve模块保存变量
import shelve
>>> shelffile=shelve.open('mydata')  # 传入文件名，将返回值保存在变量中
>>> cas=['happy','candy','water']
>>> shelffile['cas']=cas
>>> shelffile.close()
>>> shelffile=shelve.open('mydata') #shelf值不必用读写模式，它打开就能读写
>>> type(shelffile)
<class 'shelve.DbfilenameShelf'>
>>> shelffile['cas']
['happy', 'candy', 'water']
>>> shelffile.close()
>>> shelffile=shelve.open('mydata') #shelf值有类似于keys(),values() 方法
>>> list(shelffile.keys())
['cas']
>>> list(shelffile.values())
[['happy', 'candy', 'water']]
>>> shelffile.close()

# 用pprint.pformat()函数保存变量
>>> shelffile.close()
>>> import pprint
>>> cats=[{'name':'happy','des':'unhappy'},{'name':'timo','des':'dog'}]
>>> pprint.pformat(cats) #为使cats能在交互环境下使用，用pprint.pformat()返回为一个字符串
"[{'des': 'unhappy', 'name': 'happy'}, {'des': 'dog', 'name': 'timo'}]"
>>> catfle=open('pw.py','w') #写入一个名为pw 的文件中，.py使得pw 文件成为一个可导入的模块
>>> catfle.write('cats='+pprint.pformat(cats)+'\n')
75
>>> catfle.close()
>>> import pw
>>> pw.cats
[{'des': 'unhappy', 'name': 'happy'}, {'des': 'dog', 'name': 'timo'}]
>>> pw.cats[0]
{'des': 'unhappy', 'name': 'happy'}
>>> pw.cats[0]['name']
'happy'

项目：生成随机测验试卷

import random
capitals={'China':'Beijing','US':'Losanji','UK':'Jianqian',
          'Japan':'Tokyo','France':'Paris','Psc':'cute','HL':'cool',
          'Ps':'warm','Hyl':'happy','Pcc':'duty','aaa':'bbb',
          'ccc':'ddd','dd':'ff','ee':'ggg','hhh':'iii',
          'ooo':'ppp'}
# 创建测试卷
for quiznum in range(6):
    quizfile=open('capitalquiz%s.txt'%(quiznum+1),'w')
    answerfile=open('answerquiz%s.txt'%(quiznum+1),'w')
    # write out the header of the quiz
    quizfile.write('name:\n\ndate:\n\nperiod:\n\n')
    quizfile.write(' '*20+'State Capital Quiz (Form %s)' %(quiznum+1))
    quizfile.write('\n\n')
    #list（）元组返回列表
    states=list(capitals.keys())
    random.shuffle(states)
    # 创建答案选项
    for Qnum in range(16):
        correctans=capitals[states[Qnum]]
        wrongans=list(capitals.values())
        del wrongans[wrongans.index(correctans)]
        wrongans=random.sample(wrongans,3)
        ansoption=wrongans+[correctans]
        random.shuffle(ansoption)
        #将问题和选项写入测试卷
        quizfile.write('%s.what is the capital of %s?\n'%(Qnum+1,states[Qnum]))
        for i in range(4):
            quizfile.write('%s.%s\n' %('ABCD'[i],ansoption[i]))
        quizfile.write('\n')
        # 将答案写入答案文件
        answerfile.write('%s.%s\n'%(Qnum+1,'ABCD'[ansoption.index(correctans)]))
    quizfile.close()
    answerfile.close()

在这里插入图片描述
项目：多重剪切板

#! python 3
# mcb.pyw - 多重粘贴板
import shelve,sys,pyperclip
mcbshelf=shelve.open('mcb')
if len(sys.argv )==3 and sys.argv[1].lower()=='save':
    mcbshelf[sys.argv[2]]=pyperclip.paste()
elif len(sys.argv)==2:
    if sys.argv[1].lower()=='list':
        pyperclip.copy(str(list(mcbshelf.keys())))
    elif sys.argv[1] in mcbshelf:
        pyperclip.copy(mcbshelf[sys.argv[1]])
mcbshelf.close()

8.9 实践项目

#! python 3
# 8.9.1 - 多重粘贴板
import shelve,sys,pyperclip
mcbshelf=shelve.open('mcb')
if len(sys.argv )==3 and sys.argv[1].lower()=='save':
    mcbshelf[sys.argv[2]]=pyperclip.paste()
elif len(sys.argv )==3 and sys.argv[1].lower() == 'delete' and sys.argv[2] in mcbShelf:
    del mcbShelf[sys.argv[2]]
elif len(sys.argv)==2:
    if sys.argv[1].lower()=='list':
        pyperclip.copy(str(list(mcbshelf.keys())))
    elif sys.argv[1] in mcbshelf:
        pyperclip.copy(mcbshelf[sys.argv[1]])
    elif sys.argv[1].lower()=='delete':
        mcbShelf.clear() #清空字典
mcbshelf.close()

#8.9.2
import re
text='The ADJ panda walked to the NUOUM and then VERB,A nearby NOUM　was unaffected by these events.' 
'''
若被替换的内容在文件中，替换成:
text=open('a.txt','r')
strf1 = f1.read()
strf1_list = strf1.split(' ')
f1.close()
'''
adj=input('Enter an adj:\n')
noum=input('Enter an noum:\n')
verb=input('Enter an verb:\n')
spam=re.compile(r'[A-Z]{2,}')
words=spam.findall(text)
for word in words:
    if word=='ADJ':
        text=spam.sub(adj,text)
    elif word=='NOUM':
        text=spam.sub(noum,text)
    elif word=='VERB':
        text=spam.sub(verb,text)
print(text)
>>>
Enter an adj:
hhh
Enter an noum:
fff
Enter an verb:
yyy
The hhh panda walked to the hhh and then hhh,A nearby hhh　was unaffected by these events.

#8.9.3
#8.9.3
path="C:\\Users\\侯林\\Desktop\\python caogao"
filenames=os.listdir(path)
for filename in filenames:
    if filename.endswith('.txt'):
        files=open(os.path.join(path,filename)).read()
        spam=re.compile(r'China')
        text=spam.findall(files)
        if spam:
            print(' '.join(text) + ' ' + filename)
        else:
            continue
>>>    
 answerquiz1.txt
 answerquiz2.txt
 answerquiz3.txt
 answerquiz4.txt
 answerquiz5.txt
 answerquiz6.txt
China capitalquiz1.txt
China capitalquiz2.txt
China capitalquiz3.txt
China capitalquiz4.txt
China capitalquiz5.txt
China capitalquiz6.txt
 hi.txt