1.模块
1.1
[kiosk@foundation24 code]$ cat mokuai.py
#!/usr/bin/env python
#coding=utf-8
import sizedef ##调用sizedef中的模块
print sizedef.cheng(1,2)
[kiosk@foundation24 code]$ cat sizedef.py
#!/usr/bin/env python
#coding=utf-8
def add(x,y):
return x+y
def jian(x,y):
return x-y
def cheng(x,y):
return x*y
def chu(x,y):
return x/y
def printf(x,o,y):
if o == '+':
print add(x,y)
elif o == '-':
print jian(x,y)
elif o == '*':
print cheng(x,y)
elif o == '/':
print chu(x,y)
else:
print 'error'
'''
printf(1,'+',4)
'''
1.2
[kiosk@foundation24 moudle]$ python say.py
jet ##在执行函数时,原有模块调用函数也被执行了
ypa
[kiosk@foundation24 moudle]$ vim hello.py
#!/usr/bin/env python
#coding=utf-8
def jet():
return 'jet'
def ypa():
return 'ypa'
if __name__ == '__main__': ##当调用为主函数时,方可执行下面语句。当他被其他函数调用时,name不是main
print jet()
[kiosk@foundation24 moudle]$ python say.py ##故现在不执行原有模块函数的输出
ypa
在模块调用上,首先检索当前代码所在路径,如果没有就去库中查找。故,在调用系统模块时,代码所在路径不要存在与调用模块名字一样的文件。
[kiosk@foundation24 code]$ python mygetpass.py
Traceback (most recent call last):
File "mygetpass.py", line 3, in <module>
getpass.getpass()
AttributeError: 'module' object has no attribute 'getpass'
[kiosk@foundation24 code]$ rm getpass.py
[kiosk@foundation24 code]$ python mygetpass.py
Traceback (most recent call last):
File "mygetpass.py", line 3, in <module>
getpass.getpass()
AttributeError: 'module' object has no attribute 'getpass'
[kiosk@foundation24 code]$ rm -fr getpass.pyc
[kiosk@foundation24 code]$ python mygetpass.py
Password:
[kiosk@foundation24 code]$
2.包的调用
[kiosk@foundation24 code]$ cd packeage/
[kiosk@foundation24 packeage]$ ls
hello.py hello.pyc __init__.py __init__.pyc ##package里面必须加__init__.py(里面可以不加内容)
[kiosk@foundation24 packeage]$
In [4]: import packeage.hello as a ##将调用的模块从新命名
In [5]: a.jet()
Out[5]: 'jet'
In [8]: from packeage.hello import jet ##后面只能跟hello里面的函数
In [9]: jet()
Out[9]: 'jet'
In [10]: ypa()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-10-a057f4ce7a4c> in <module>()
----> 1 ypa()
NameError: name 'ypa' is not defined
In [1]: import packeage.hello
In [6]: packeage.hello.jet()
Out[6]: 'jet'
example:
In [1]: import packeage.sizedef
In [3]: print packeage.sizedef.add(1,7)
8
In [4]: import packeage.sizedef as wl
In [5]: wl.add(1,4)
Out[5]: 5
In [6]: from packeage.sizedef import cheng
In [7]: cheng(2,5)
Out[7]: 10
3.字符的操作
3.1字符串的分割
In [8]: s='172.25.254.24'
In [9]: s.split('.',2) ##以.为终结分割两次
Out[9]: ['172', '25', '254.24']
In [10]: s.split()
Out[10]: ['172.25.254.24']
In [11]: s='hello word my dear'
In [12]: s.split() ##默认以‘ ’为分割
Out[12]: ['hello', 'word', 'my', 'dear']
In [13]: s=["hello","fendai","zhuru"] ##列表
In [14]: "".join(s) ##没有字符连接起来
Out[14]: 'hellofendaizhuru'
In [15]: ','.join(s) ##以‘,’为分割连接起来
Out[15]: 'hello,fendai,zhuru'
In [16]: '+',join(s)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-16-730bcbf99809> in <module>()
----> 1 '+',join(s)
NameError: name 'join' is not defined
In [17]: '+'.join(s)
Out[17]: 'hello+fendai+zhuru'
3.2字符串的查找
In [18]: str = 'my name is jet'
In [19]: str
Out[19]: 'my name is jet'
In [20]: str.find('my') ##查找字符的首字母第一次出现的位置索引
Out[20]: 0
In [21]: str.find('name')
Out[21]: 3
In [22]: str.find('name',7,10) ##制定从那个位置到那个位置查找该字符
Out[22]: -1 ##没找到会返回-1
3.3字符串的替换
In [23]: str1 = str.replace('jet','qinyu') ##将替换后的句子赋值给str1
In [24]: str1
Out[24]: 'my name is qinyu'
In [25]: str ##可以发现,原字符没有变化
Out[25]: 'my name is jet'
In [26]: s = "\ni love you qinyu"
In [27]: s
Out[27]: '\ni love you qinyu'
In [28]: s.lstrip() ##去除字符串左端的格式符号
Out[28]: 'i love you qinyu'
In [29]: s.rstrip() ##去除字符串右端的格式符号
Out[29]: '\ni love you qinyu'
文件备份:
#!/usr/bin/env python
#coding=utf-8
import os ##os模块
back_file = ["/home/kiosk/Desktop/hello","/home/kiosk/Desktop/word"] ##目标文件
dstBackDir = "/home/kiosk/Desktop/python4/" ##备份地址
houzui = 'back.tar.gz' ##压缩格式后缀
comond = "tar zcf %s %s" %(dstBackDir+houzui," ".join(back_file)) ##命令
if os.system(comond) == 0: ##模拟shell环境下执行命令
print "文件备份成功!"
else:
print "文件备份失败!"
4.正则表达式
4.1规则
. ##匹配任何字符
^ ##匹配字符串的开始
$ ##匹配字符串的结尾
* ##匹配前面出现的正则表达式零次或多次
+ ##匹配前面出现的正则表达式1次或多次
? ##匹配前面出现的正则表达式0次或1次
In [58]: import re
In [59]: re.findall(r"^hello","hello man") ##以hello开头
Out[59]: ['hello']
In [60]: re.findall(r"^hello","man hello")
Out[60]: []
In [61]: re.findall(r"hello$","man hello") ##以hello结尾
Out[61]: ['hello']
In [62]: re.findall(r"hello$","man hello")
Out[62]: ['hello']
In [63]: re.findall(r"hello*","man hello") ##只要含有hello即可
Out[63]: ['hello']
In [64]: re.findall(r"hello*","hello man")
Out[64]: ['hello']
example1:
In [68]: r = r"[bh][aiu]t"
In [69]: re.findall(r,"hello hat bit")
Out[69]: ['hat', 'bit']
4.2 特殊字符用法
In [74]: re.findall(r"\d","6138216adhad") ##只抽取数字
Out[74]: ['6', '1', '3', '8', '2', '1', '6']
In [75]: re.findall(r"\D","6138216adhad") ##只抽取非字符
Out[75]: ['a', 'd', 'h', 'a', 'd']
In [76]: re.findall(r"\s"," 61\t38216\nadhad") ##只抽取“\t \n \r”
Out[76]: [' ', '\t', '\n']
In [77]: re.findall(r"\S"," 61\t38216\nadhad") ##除“\t \n \r”外
Out[77]: ['6', '1', '3', '8', '2', '1', '6', 'a', 'd', 'h', 'a', 'd']
In [78]: re.findall(r"\w"," 61\t38216\nadhad_") ##抽取字母及数字和”_“
Out[78]: ['6', '1', '3', '8', '2', '1', '6', 'a', 'd', 'h', 'a', 'd', '_']
In [79]: re.findall(r"\W"," 61\t38216\nadhad_") ##取反
Out[79]: [' ', '\t', '\n']
4.3
In [80]: re.findall(r"ad*","ad")
Out[80]: ['ad']
In [81]: re.findall(r"ad*","a")
Out[81]: ['a']
In [82]: re.findall(r"ad*","b")
Out[82]: []
In [83]: re.findall(r"ad*","adbdb")
Out[83]: ['ad']
In [84]: re.findall(r"ad*","adddddddddbdb")
Out[84]: ['addddddddd']
In [85]: re.findall(r"ad+","adddddddddbdb")
Out[85]: ['addddddddd']
In [86]: re.findall(r"ad+","a")
Out[86]: []
In [87]: re.findall(r"ad+","d")
Out[87]: []
In [88]: re.findall(r"ad+","ad")
Out[88]: ['ad']
In [89]: re.findall(r"ad?","adddddddd")
Out[89]: ['ad']
In [90]: re.findall(r"ad?","a")
Out[90]: ['a']
In [91]: re.findall(r"ad?","d")
Out[91]: []
In [92]: re.findall(r"ad?","ad")
Out[92]: ['ad']
In [93]: re.findall(r"010-?,\d{3}","010--123")
Out[93]: []
In [94]: re.findall(r"010-?,\d{3}","010-123")
Out[94]: []
In [95]: re.findall(r"010-?\d{3}$","010-123")
Out[95]: ['010-123']
In [96]: re.findall(r"010-?\d{3}$","010--123")
Out[96]: []
In [103]: re.findall(r"^010-\d{3,5}$","010-1234") ##以010-开头,后面跟3到5位数字
Out[103]: ['010-1234']
In [104]: re.findall(r"^010-\d{3,5}$","010-123456")
Out[104]: []
example:
In [113]: r= r"^www\.\w*\.com$"
In [114]: re.findall(r,"www.baidu.com")
Out[114]: ['www.baidu.com']
4.4 ?的两种模式
In [116]: re.findall(r"ab+","abb") ##贪婪模式
Out[116]: ['abb']
In [117]: re.findall(r"ab+?","abb") ##反贪婪模式(只去除符合)
Out[117]: ['ab']
In [118]: re.findall(r"ab?","abbbbbb")
Out[118]: ['ab']
4.5编译规则
In [119]: x = re.compile(r"ab+")
In [120]: x.findall("abbcdabxea") ##直接用对象调用
Out[120]: ['abb', 'ab']
In [121]: y = re.compile(r"jet",re.I) ##匹配的时候不区分大小写
In [122]: y.findall("Jet") ##区分大小写
Out[122]: ['Jet']
4.6特殊用法
In [121]: y = re.compile(r"jet",re.I)
In [122]: y.findall("Jet")
Out[122]: ['Jet']
In [123]: y.match("hello jet i am yu") ##匹配字符串的开头
In [124]: y.search("hello jet i am yu") ##匹配整个字符串
Out[124]: <_sre.SRE_Match at 0x1b037e8>
In [125]: y.match("jet i am yu")
Out[125]: <_sre.SRE_Match at 0x1b03920>
In [128]: z=y.finditer("hello jet i am yu") ##返回为迭代器
In [129]: z.next() ##查看迭代器
Out[129]: <_sre.SRE_Match at 0x1b039f0>
In [130]: z.next() ##返回了一个,故第二次查询失败
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
<ipython-input-130-6c49e4c11a56> in <module>()
----> 1 z.next()
StopIteration:
In [131]: z=y.finditer("hello jet i am jet sister") ##返回两个
In [132]: z.next()
Out[132]: <_sre.SRE_Match at 0x1b03ac0>
In [133]: z.next()
Out[133]: <_sre.SRE_Match at 0x1b03b28>
In [134]: z.next() ##故查询第三个失败
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
<ipython-input-134-6c49e4c11a56> in <module>()
----> 1 z.next()
StopIteration:
In [135]: y
Out[135]: re.compile(r'jet', re.IGNORECASE)
In [136]: z=y.match("jet hi")
In [137]: z.group() ##查看生成的
Out[137]: 'jet'
In [138]:
Example:
#!/usr/bin/env python
#coding=utf-8
import re
x = re.compile(r"jet")
y = x.search("hi yu my name is jet")
if y:
print "Match ok:",y.group()
else:
print "Sorry not Found"
测试:
[kiosk@foundation24 2017-04-08]$ python match.py
Match ok: jet
4.6模块级函数
In [1]: import re
In [4]: re.subn(r"jetzh..g","jet","jetzhang is better man")
Out[4]: ('jet is better man', 1)
In [5]: re.sub(r"jetzh..g","jet","jetzhang is better man")
Out[5]: 'jet is better man'
In [7]: re.subn(r"jetzh..g","jet","jetzhang is better man jetzhang say ok")
Out[7]: ('jet is better man jet say ok', 2)
In [8]: s = "172.25.254.24"
In [10]: s.split(".")
Out[10]: ['172', '25', '254', '24']
In [11]: s= "12 + 13 * 14 - 15 / 18"
In [12]: re.split(r"[\+\-\*/]","12 + 13 * 14 - 15 / 18") ##运算符
Out[12]: ['12 ', ' 13 ', ' 14 ', ' 15 ', ' 18']
In [14]: re.findall(r"westos.cn","westos\tcn")
Out[14]: ['westos\tcn']
In [15]: re.findall(r"westos.cn","westos\rcn")
Out[15]: ['westos\rcn']
In [16]: re.findall(r"westos.cn","westos\rcn",re.S)
Out[16]: ['westos\rcn']
In [17]: re.findall(r"westos.cn","westos cn")
Out[17]: ['westos cn']
In [19]: re.findall(r"westos.cn","westos cn",re.S)
Out[19]: ['westos cn']
In [20]: s ="""
....: hello jet
....: jet and yu
....: jet or yu
....: """
In [21]: re.findall(r"^jet",s)
Out[21]: []
In [22]: re.findall(r"^jet",s,re.M) ##逐行匹配检索
Out[22]: ['jet', 'jet']
In [23]: r = '''
....: ^029
....: -?
....: \d{7}
....: '''
In [24]: r
Out[24]: '\n^029\n-?\n\\d{7}\n'
In [25]: re.findall(r,"029-8542911")
Out[25]: []
In [26]: re.findall(r,"029-8542911",re.X) ##合并匹配条件检索
Out[26]: ['029-8542911']
4.7 分组
In [27]: r = r"^www\.\w+\.(com|edu|net)"
In [28]: re.search(r,"www.jet.edu").group()
Out[28]: 'www.jet.edu'
In [29]: s ='''
....: hello jet
....: hello src="www.jet.com" say yes
....: ok hello src="www.jet.com" say no
....: '''
In [30]: r = r'hello src="(.+)" yes'
In [31]: re.findall(r,s)
Out[31]: ['www.jet.com']
练习小爬虫:
#!/usr/bin/env python
#coding=utf-8
import re
import urllib
def gethtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getimage(html):
reg = r'src="(.+\.png)"'
imger = re.compile(reg)
imglist = re.findall(imger,html)
x = 1
for imgurl in imglist:
urllib.urlretrieve(imgurl,"%s.png" %x)
x += 1
wangzhi = raw_input("请输入网址:")
html = gethtml(wangzhi)
print getimage(html)
1.1
[kiosk@foundation24 code]$ cat mokuai.py
#!/usr/bin/env python
#coding=utf-8
import sizedef ##调用sizedef中的模块
print sizedef.cheng(1,2)
[kiosk@foundation24 code]$ cat sizedef.py
#!/usr/bin/env python
#coding=utf-8
def add(x,y):
return x+y
def jian(x,y):
return x-y
def cheng(x,y):
return x*y
def chu(x,y):
return x/y
def printf(x,o,y):
if o == '+':
print add(x,y)
elif o == '-':
print jian(x,y)
elif o == '*':
print cheng(x,y)
elif o == '/':
print chu(x,y)
else:
print 'error'
'''
printf(1,'+',4)
'''
1.2
[kiosk@foundation24 moudle]$ python say.py
jet ##在执行函数时,原有模块调用函数也被执行了
ypa
[kiosk@foundation24 moudle]$ vim hello.py
#!/usr/bin/env python
#coding=utf-8
def jet():
return 'jet'
def ypa():
return 'ypa'
if __name__ == '__main__': ##当调用为主函数时,方可执行下面语句。当他被其他函数调用时,name不是main
print jet()
[kiosk@foundation24 moudle]$ python say.py ##故现在不执行原有模块函数的输出
ypa
在模块调用上,首先检索当前代码所在路径,如果没有就去库中查找。故,在调用系统模块时,代码所在路径不要存在与调用模块名字一样的文件。
[kiosk@foundation24 code]$ python mygetpass.py
Traceback (most recent call last):
File "mygetpass.py", line 3, in <module>
getpass.getpass()
AttributeError: 'module' object has no attribute 'getpass'
[kiosk@foundation24 code]$ rm getpass.py
[kiosk@foundation24 code]$ python mygetpass.py
Traceback (most recent call last):
File "mygetpass.py", line 3, in <module>
getpass.getpass()
AttributeError: 'module' object has no attribute 'getpass'
[kiosk@foundation24 code]$ rm -fr getpass.pyc
[kiosk@foundation24 code]$ python mygetpass.py
Password:
[kiosk@foundation24 code]$
2.包的调用
[kiosk@foundation24 code]$ cd packeage/
[kiosk@foundation24 packeage]$ ls
hello.py hello.pyc __init__.py __init__.pyc ##package里面必须加__init__.py(里面可以不加内容)
[kiosk@foundation24 packeage]$
In [4]: import packeage.hello as a ##将调用的模块从新命名
In [5]: a.jet()
Out[5]: 'jet'
In [8]: from packeage.hello import jet ##后面只能跟hello里面的函数
In [9]: jet()
Out[9]: 'jet'
In [10]: ypa()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-10-a057f4ce7a4c> in <module>()
----> 1 ypa()
NameError: name 'ypa' is not defined
In [1]: import packeage.hello
In [6]: packeage.hello.jet()
Out[6]: 'jet'
example:
In [1]: import packeage.sizedef
In [3]: print packeage.sizedef.add(1,7)
8
In [4]: import packeage.sizedef as wl
In [5]: wl.add(1,4)
Out[5]: 5
In [6]: from packeage.sizedef import cheng
In [7]: cheng(2,5)
Out[7]: 10
3.字符的操作
3.1字符串的分割
In [8]: s='172.25.254.24'
In [9]: s.split('.',2) ##以.为终结分割两次
Out[9]: ['172', '25', '254.24']
In [10]: s.split()
Out[10]: ['172.25.254.24']
In [11]: s='hello word my dear'
In [12]: s.split() ##默认以‘ ’为分割
Out[12]: ['hello', 'word', 'my', 'dear']
In [13]: s=["hello","fendai","zhuru"] ##列表
In [14]: "".join(s) ##没有字符连接起来
Out[14]: 'hellofendaizhuru'
In [15]: ','.join(s) ##以‘,’为分割连接起来
Out[15]: 'hello,fendai,zhuru'
In [16]: '+',join(s)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-16-730bcbf99809> in <module>()
----> 1 '+',join(s)
NameError: name 'join' is not defined
In [17]: '+'.join(s)
Out[17]: 'hello+fendai+zhuru'
3.2字符串的查找
In [18]: str = 'my name is jet'
In [19]: str
Out[19]: 'my name is jet'
In [20]: str.find('my') ##查找字符的首字母第一次出现的位置索引
Out[20]: 0
In [21]: str.find('name')
Out[21]: 3
In [22]: str.find('name',7,10) ##制定从那个位置到那个位置查找该字符
Out[22]: -1 ##没找到会返回-1
3.3字符串的替换
In [23]: str1 = str.replace('jet','qinyu') ##将替换后的句子赋值给str1
In [24]: str1
Out[24]: 'my name is qinyu'
In [25]: str ##可以发现,原字符没有变化
Out[25]: 'my name is jet'
In [26]: s = "\ni love you qinyu"
In [27]: s
Out[27]: '\ni love you qinyu'
In [28]: s.lstrip() ##去除字符串左端的格式符号
Out[28]: 'i love you qinyu'
In [29]: s.rstrip() ##去除字符串右端的格式符号
Out[29]: '\ni love you qinyu'
文件备份:
#!/usr/bin/env python
#coding=utf-8
import os ##os模块
back_file = ["/home/kiosk/Desktop/hello","/home/kiosk/Desktop/word"] ##目标文件
dstBackDir = "/home/kiosk/Desktop/python4/" ##备份地址
houzui = 'back.tar.gz' ##压缩格式后缀
comond = "tar zcf %s %s" %(dstBackDir+houzui," ".join(back_file)) ##命令
if os.system(comond) == 0: ##模拟shell环境下执行命令
print "文件备份成功!"
else:
print "文件备份失败!"
4.正则表达式
4.1规则
. ##匹配任何字符
^ ##匹配字符串的开始
$ ##匹配字符串的结尾
* ##匹配前面出现的正则表达式零次或多次
+ ##匹配前面出现的正则表达式1次或多次
? ##匹配前面出现的正则表达式0次或1次
In [58]: import re
In [59]: re.findall(r"^hello","hello man") ##以hello开头
Out[59]: ['hello']
In [60]: re.findall(r"^hello","man hello")
Out[60]: []
In [61]: re.findall(r"hello$","man hello") ##以hello结尾
Out[61]: ['hello']
In [62]: re.findall(r"hello$","man hello")
Out[62]: ['hello']
In [63]: re.findall(r"hello*","man hello") ##只要含有hello即可
Out[63]: ['hello']
In [64]: re.findall(r"hello*","hello man")
Out[64]: ['hello']
example1:
In [68]: r = r"[bh][aiu]t"
In [69]: re.findall(r,"hello hat bit")
Out[69]: ['hat', 'bit']
4.2 特殊字符用法
In [74]: re.findall(r"\d","6138216adhad") ##只抽取数字
Out[74]: ['6', '1', '3', '8', '2', '1', '6']
In [75]: re.findall(r"\D","6138216adhad") ##只抽取非字符
Out[75]: ['a', 'd', 'h', 'a', 'd']
In [76]: re.findall(r"\s"," 61\t38216\nadhad") ##只抽取“\t \n \r”
Out[76]: [' ', '\t', '\n']
In [77]: re.findall(r"\S"," 61\t38216\nadhad") ##除“\t \n \r”外
Out[77]: ['6', '1', '3', '8', '2', '1', '6', 'a', 'd', 'h', 'a', 'd']
In [78]: re.findall(r"\w"," 61\t38216\nadhad_") ##抽取字母及数字和”_“
Out[78]: ['6', '1', '3', '8', '2', '1', '6', 'a', 'd', 'h', 'a', 'd', '_']
In [79]: re.findall(r"\W"," 61\t38216\nadhad_") ##取反
Out[79]: [' ', '\t', '\n']
4.3
In [80]: re.findall(r"ad*","ad")
Out[80]: ['ad']
In [81]: re.findall(r"ad*","a")
Out[81]: ['a']
In [82]: re.findall(r"ad*","b")
Out[82]: []
In [83]: re.findall(r"ad*","adbdb")
Out[83]: ['ad']
In [84]: re.findall(r"ad*","adddddddddbdb")
Out[84]: ['addddddddd']
In [85]: re.findall(r"ad+","adddddddddbdb")
Out[85]: ['addddddddd']
In [86]: re.findall(r"ad+","a")
Out[86]: []
In [87]: re.findall(r"ad+","d")
Out[87]: []
In [88]: re.findall(r"ad+","ad")
Out[88]: ['ad']
In [89]: re.findall(r"ad?","adddddddd")
Out[89]: ['ad']
In [90]: re.findall(r"ad?","a")
Out[90]: ['a']
In [91]: re.findall(r"ad?","d")
Out[91]: []
In [92]: re.findall(r"ad?","ad")
Out[92]: ['ad']
In [93]: re.findall(r"010-?,\d{3}","010--123")
Out[93]: []
In [94]: re.findall(r"010-?,\d{3}","010-123")
Out[94]: []
In [95]: re.findall(r"010-?\d{3}$","010-123")
Out[95]: ['010-123']
In [96]: re.findall(r"010-?\d{3}$","010--123")
Out[96]: []
In [103]: re.findall(r"^010-\d{3,5}$","010-1234") ##以010-开头,后面跟3到5位数字
Out[103]: ['010-1234']
In [104]: re.findall(r"^010-\d{3,5}$","010-123456")
Out[104]: []
example:
In [113]: r= r"^www\.\w*\.com$"
In [114]: re.findall(r,"www.baidu.com")
Out[114]: ['www.baidu.com']
4.4 ?的两种模式
In [116]: re.findall(r"ab+","abb") ##贪婪模式
Out[116]: ['abb']
In [117]: re.findall(r"ab+?","abb") ##反贪婪模式(只去除符合)
Out[117]: ['ab']
In [118]: re.findall(r"ab?","abbbbbb")
Out[118]: ['ab']
4.5编译规则
In [119]: x = re.compile(r"ab+")
In [120]: x.findall("abbcdabxea") ##直接用对象调用
Out[120]: ['abb', 'ab']
In [121]: y = re.compile(r"jet",re.I) ##匹配的时候不区分大小写
In [122]: y.findall("Jet") ##区分大小写
Out[122]: ['Jet']
4.6特殊用法
In [121]: y = re.compile(r"jet",re.I)
In [122]: y.findall("Jet")
Out[122]: ['Jet']
In [123]: y.match("hello jet i am yu") ##匹配字符串的开头
In [124]: y.search("hello jet i am yu") ##匹配整个字符串
Out[124]: <_sre.SRE_Match at 0x1b037e8>
In [125]: y.match("jet i am yu")
Out[125]: <_sre.SRE_Match at 0x1b03920>
In [128]: z=y.finditer("hello jet i am yu") ##返回为迭代器
In [129]: z.next() ##查看迭代器
Out[129]: <_sre.SRE_Match at 0x1b039f0>
In [130]: z.next() ##返回了一个,故第二次查询失败
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
<ipython-input-130-6c49e4c11a56> in <module>()
----> 1 z.next()
StopIteration:
In [131]: z=y.finditer("hello jet i am jet sister") ##返回两个
In [132]: z.next()
Out[132]: <_sre.SRE_Match at 0x1b03ac0>
In [133]: z.next()
Out[133]: <_sre.SRE_Match at 0x1b03b28>
In [134]: z.next() ##故查询第三个失败
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
<ipython-input-134-6c49e4c11a56> in <module>()
----> 1 z.next()
StopIteration:
In [135]: y
Out[135]: re.compile(r'jet', re.IGNORECASE)
In [136]: z=y.match("jet hi")
In [137]: z.group() ##查看生成的
Out[137]: 'jet'
In [138]:
Example:
#!/usr/bin/env python
#coding=utf-8
import re
x = re.compile(r"jet")
y = x.search("hi yu my name is jet")
if y:
print "Match ok:",y.group()
else:
print "Sorry not Found"
测试:
[kiosk@foundation24 2017-04-08]$ python match.py
Match ok: jet
4.6模块级函数
In [1]: import re
In [4]: re.subn(r"jetzh..g","jet","jetzhang is better man")
Out[4]: ('jet is better man', 1)
In [5]: re.sub(r"jetzh..g","jet","jetzhang is better man")
Out[5]: 'jet is better man'
In [7]: re.subn(r"jetzh..g","jet","jetzhang is better man jetzhang say ok")
Out[7]: ('jet is better man jet say ok', 2)
In [8]: s = "172.25.254.24"
In [10]: s.split(".")
Out[10]: ['172', '25', '254', '24']
In [11]: s= "12 + 13 * 14 - 15 / 18"
In [12]: re.split(r"[\+\-\*/]","12 + 13 * 14 - 15 / 18") ##运算符
Out[12]: ['12 ', ' 13 ', ' 14 ', ' 15 ', ' 18']
In [14]: re.findall(r"westos.cn","westos\tcn")
Out[14]: ['westos\tcn']
In [15]: re.findall(r"westos.cn","westos\rcn")
Out[15]: ['westos\rcn']
In [16]: re.findall(r"westos.cn","westos\rcn",re.S)
Out[16]: ['westos\rcn']
In [17]: re.findall(r"westos.cn","westos cn")
Out[17]: ['westos cn']
In [19]: re.findall(r"westos.cn","westos cn",re.S)
Out[19]: ['westos cn']
In [20]: s ="""
....: hello jet
....: jet and yu
....: jet or yu
....: """
In [21]: re.findall(r"^jet",s)
Out[21]: []
In [22]: re.findall(r"^jet",s,re.M) ##逐行匹配检索
Out[22]: ['jet', 'jet']
In [23]: r = '''
....: ^029
....: -?
....: \d{7}
....: '''
In [24]: r
Out[24]: '\n^029\n-?\n\\d{7}\n'
In [25]: re.findall(r,"029-8542911")
Out[25]: []
In [26]: re.findall(r,"029-8542911",re.X) ##合并匹配条件检索
Out[26]: ['029-8542911']
4.7 分组
In [27]: r = r"^www\.\w+\.(com|edu|net)"
In [28]: re.search(r,"www.jet.edu").group()
Out[28]: 'www.jet.edu'
In [29]: s ='''
....: hello jet
....: hello src="www.jet.com" say yes
....: ok hello src="www.jet.com" say no
....: '''
In [30]: r = r'hello src="(.+)" yes'
In [31]: re.findall(r,s)
Out[31]: ['www.jet.com']
练习小爬虫:
#!/usr/bin/env python
#coding=utf-8
import re
import urllib
def gethtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getimage(html):
reg = r'src="(.+\.png)"'
imger = re.compile(reg)
imglist = re.findall(imger,html)
x = 1
for imgurl in imglist:
urllib.urlretrieve(imgurl,"%s.png" %x)
x += 1
wangzhi = raw_input("请输入网址:")
html = gethtml(wangzhi)
print getimage(html)