python-re模块-hashlib模块

最新推荐文章于 2024-07-12 17:01:23 发布

zdc45625

最新推荐文章于 2024-07-12 17:01:23 发布

阅读量172

点赞数 1

分类专栏： Python学习

本文链接：https://blog.csdn.net/zdc45625/article/details/84863074

版权

Python学习专栏收录该内容

33 篇文章 1 订阅

订阅专栏

文章目录

- - 一、re模块
  - 二、hashlib模块

一、re模块

print(re.findall('alex','hahahah alex is alex is dsb')) #['alex', 'alex']

#\w 匹配字母数字及下划线
print(re.findall('\w','Aah123 +-_')) #['A', 'a', 'h', '1', '2', '3', '_']
print(re.findall('\w\w','Aah123 +-_')) #['Aa', 'h1', '23']
print(re.findall('\w9\w','Aa9h123 aaa9c+-_')) #['a9h', 'a9c']

#\W 匹配非字母数字下划线
print(re.findall('\W','Aah123 +-_')) #[' ', '+', '-']

#\s 匹配任意空白字符，等价于[\t\n\r\f]
print(re.findall('\s','Aah\t12\n3 +-_')) #['\t', '\n', ' ']
#\S 匹配任意非空字符
print(re.findall('\S','Aah\t12\n3 +-_')) #['A', 'a', 'h', '1', '2', '3', '+', '-', '_']
#\d 匹配任意数字，等价于[0-9]
print(re.findall('\d','Aah\t12\n3 +-_')) #['1', '2', '3']
#\D 匹配任意非数字
print(re.findall('\D','Aah\t12\n3 +-_')) #['A', 'a', 'h', '\t', '\n', ' ', '+', '-', '_']

print(re.findall('\w\w\d\d','asfdasdfegon001adfadfegon002asdfxx01 yy02')) #['on00', 'on00', 'xx01', 'yy02']

#\t匹配一个制表符
print(re.findall('\t','Aah\t12\n3 +-_'))
#\n匹配一个换行符
print(re.findall('\n','Aah\t12\n3 +-_'))
# ^匹配字符串的开头
print(re.findall('^alex','alex is alex is alex')) #['alex']
# $匹配字符串的末尾
print(re.findall('alex$',' alex is alex is alex1')) #[]
# .代表匹配一个字符,该字符可以是除换行符之外任意字符
print(re.findall('a.c','a a1c aaac a c asfdsaf a\nc',re.DOTALL)) #['a1c', 'aac', 'a c', 'a\nc']
# []代表匹配一个字符,这一个字符是来自于我们自定义的范围
1.print(re.findall('a[0-9]c','a,c a a1c a9c aaac a c asfdsaf a\nc',re.DOTALL)) #['a1c', 'a9c']

2.print(re.findall('a[a-zA-Z]c','a,c aAc a1c a9c aaac a c asfdsaf a\nc',re.DOTALL)) #['aAc', 'aac']

3.print(re.findall('a[a-zA-Z]c','a,c aAc a1c a9c aaac a c asfdsaf a\nc',re.DOTALL)) #['aAc', 'aac']

4.print(re.findall('a[+*/-]c','a,c a+c a-c a*c a/c aAc a1c a9c aaac a c asfdsaf a\nc',re.DOTALL)) #['a+c', 'a-c', 'a*c', 'a/c']

5.print(re.findall('a[+*\-/]c','a,c a+c a-c a*c a/c aAc a1c a9c aaac a c asfdsaf a\nc',re.DOTALL)) #['a+c', 'a-c', 'a*c', 'a/c']

6.print(re.findall('a[^0-9]c','a,c a a1c a9c aaac a c asfdsaf a\nc',re.DOTALL)) #['a,c', 'aac', 'a c', 'a\nc']

# ?代表左边那一个字符出现0次到1次
print(re.findall('ab?','a ab abb abbbb a123b a123bbbb')) #['a', 'ab', 'ab', 'ab', 'a', 'a']
# *代表左边那一个字符出现0次到无穷次
print(re.findall('ab*','a ab abb abbbb a123b a123bbbb')) #['a', 'ab', 'abb', 'abbbb', 'a', 'a'] 
# +代表左边那一个字符出现1次到无穷次
print(re.findall('ab+','a ab abb abbbb a123b a123bbbb')) #['ab', 'abb', 'abbbb']
# {n,m}:代表左边那一个字符出现n次到m次
print(re.findall('ab{1,3}','a ab abb abbbb a123b a123bbbb')) #['ab', 'abb', 'abbb']
print(re.findall('ab{1,}','a ab abb abbbb a123b a123bbbb')) #['ab', 'abb', 'abbbb']
print(re.findall('ab+','a ab abb abbbb a123b a123bbbb')) #['ab', 'abb', 'abbbb']
print(re.findall('ab{0,}','a ab abb abbbb a123b a123bbbb')) #['a', 'ab', 'abb', 'abbbb', 'a', 'a']
print(re.findall('ab*','a ab abb abbbb a123b a123bbbb')) #['a', 'ab', 'abb', 'abbbb', 'a', 'a']
print(re.findall('ab{3}','a ab abb abbbb a123b a123bbbb')) #['abbb']

# *匹配任意0个到无穷个字符,贪婪匹配
print(re.findall('a.*c','a123213123asdfasdfc123123123123+-0)((c123123')) #['a123213123asdfasdfc123123123123+-0)((c']

# *? 匹配任意0个到无穷个字符,非贪婪匹配
print(re.findall('a.*?c','a123213123asdfasdfc123123123123+-0)((c123123'))#['a123213123asdfasdfc']

# |或者
print(re.findall('companies|company','Too many companies have gone bankrupt,c and the next one is my company')) #['companies', 'company']

#()分组
print(re.findall('compan(?:ies|y)','Too many companies have gone bankrupt,c and the next one is my company')) #['companies', 'company']

print(re.findall('href="(.*?)"','<p>动感视频</p><a href="https://www.douniwan.com/1.mp4">逗你玩呢</a><a href="https://www.xxx.com/2.mp4">葫芦娃</a>')) #['https://www.douniwan.com/1.mp4', 'https://www.xxx.com/2.mp4']

print(re.findall('a\\\\c','a\c aac')) #['a\\c']
print(re.findall(r'a\\c','a\c aac')) #['a\\c']

#使匹配对大小写不敏感
print(re.findall('alex','my name is alex Alex is dsb aLex ALeX',re.I)) #['alex', 'Alex', 'aLex', 'ALeX']

#匹配每行行末
msg="""
my name is egon
asdfsadfadfsadf egon
123123123123123egon
"""
print(re.findall('egon$',msg,re.M)) #['egon', 'egon', 'egon'] 

#re 模块的其他方法
res=re.findall('(href)="(.*?)"','<p>动感视频</p><a href="https://www.douniwan.com/1.mp4">逗你玩呢</a><a href="https://www.xxx.com/2.mp4">葫芦娃</a>')
print(res) [('href', 'https://www.douniwan.com/1.mp4'), ('href', 'https://www.xxx.com/2.mp4')]

res=re.search('(href)="(.*?)"','<p>动感视频</p><a href="https://www.douniwan.com/1.mp4">逗你玩呢</a><a href="https://www.xxx.com/2.mp4">葫芦娃</a>')
print(res) #<_sre.SRE_Match object; span=(14, 51), match='href="https://www.douniwan.com/1.mp4"'>
print(res.group(0)) #href="https://www.douniwan.com/1.mp4"
print(res.group(1)) #href
print(res.group(2)) #https://www.douniwan.com/1.mp4

res=re.match('abc','123abc') # 相当于res=re.search('^abc','123abc')
print(res) None


pattern=re.compile('alex')
print(pattern.findall('alex is alex is alex')) #['alex', 'alex', 'alex']
print(pattern.search('alex is alex is alex')) #<_sre.SRE_Match object; span=(0, 4), match='alex'>
print(pattern.match('alex is alex is alex')) #<_sre.SRE_Match object; span=(0, 4), match='alex'>

示范

#将数字全部提取出来
import re
msg="1-2*(60+(-40.35/5)-(-40*3))"
print(re.findall('\D?(-?\d+\.?\d*)',msg))

二、hashlib模块

什么是hash：hash是一种算法,该算法接受一系列的数据,经过运算会得到一个hash值

hash值具备三大特性：
1. 只要传入的内容一样,那么得到的hash值一定是一样
2. 只要采用hash算法固定,无论传入的内容多大,hash值的长度是固定
3. hash值不可逆,即不能通过hash值逆推出内容

为何要用hash：文件完整性校验

md5：

import hashlib

m = hashlib.md5()
m.update('你好'.encode('utf-8'))
m.update('hello'.encode('utf-8'))
print(m.hexdigest()) #65c83c71cb3b2e2882f99358430679c3
print(len(m.hexdigest())) #32位

sha：

import hashlib

m2 = hashlib.sha512()
m2.update(b'asdfassssssssssssssssssssssssssss')
print(m2.hexdigest())
print(len(m2.hexdigest())) #128位

查看文件文件的hash

import hashlib
with open('a.txt', mode='rb') as f:
    m = hashlib.md5()
    for line in f:
        m.update(line)
    print(m.hexdigest())

hash 加盐：

import hashlib

pwd = input('password>>> ').strip()
m = hashlib.md5()
m.update('天王盖地虎'.encode('utf-8'))
m.update(pwd.encode('utf-8'))
m.update('一行白鹭上青天'.encode('utf-8'))
print(m.hexdigest())