正则表达式的使用

最新推荐文章于 2022-04-25 14:06:22 发布

小学生反弹

最新推荐文章于 2022-04-25 14:06:22 发布

阅读量278

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/u012315428/article/details/50757086

版权

python 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

2016-2-27

常用符号

#-*-coding:utf8-*-
import re
# from re import findall,search,S
print('hello world')
secret_code = 'feafefexxixx23fe23xxlovexxafeifvaxxyouxx32fe'
a = 'xz123'
b = re.findall('x', a)
# .类似于占位符
print(b)

a = 'xyxy123'
# *显示x的位置
b = re.findall('x*', a)
print(b)

b = re.findall('x?', a)
print(b)

# 。*贪心查找最多的在之间
b = re.findall('xx.*xx', secret_code)
print(b)

# 。*？最多种的组合
c = re.findall('xx.*?xx', secret_code)
print(c)

# 需要的放在（）里，不需要的放在（）外
d = re.findall('xx(.*?)xx', secret_code)
print(d)

for each in d:
print(each)

s = '''sdfxxhello
xxfsdfxxworldxxasdf'''
# 第二行的xx变成开始的xx，换行符也是
e = re.findall('xx(.*?)xx', s, re.S)
# S hello\n world
print(e)

# 对比search和findall的使用
s2 = 'asdfxxixx123xxlovexxdfd'
# group 代表了（）的个数
f = re.search('xx(.*?)xx123xx(.*?)xx', s2).group(2)
print(f)

f2 = re.search('xx(.*?)xx123xx(.*?)xx', s2)
#print(f2[0][1])

#sub
s = '123abcssfasdfas123'
#  吧123之间的字符换成789
output = re.sub('123(.*?)123', '123%d123'%789, s)
print(output)

# (\d+)匹配数字
a = 'asdfasf1234567fasd55fas'
b = re.findall('(\d+)', a)
print(b)

#-*-coding:utf8-*-
import re

old_url = 'http://www.pythontab.com/html/2013/pythonhexinbiancheng001.html'
total_page = 20

f = open('test.txt', 'r')
html = f.read()
f.close()

# search 爬到符合的地方就会停下
# findall 则会遍历整个文档
title = re.search('<title>(.*?)</title>', html, re.S).group(1)
print(title)

links = re.findall('href="(.*?)"', html, re.S)
for each in links:
print(each)


# 先抓大在抓小
# text_field = re.findall('<ul>(.*?)</ul>', html, re.S)
# the_text = re.findall('')

# 翻页
for i in range(2,total_page+1):
new_link = re.sub('pythonhexinbiancheng00\d+', 'pythonhexinbiancheng00%d'%i, old_url, re.S)
print(new_link)

小学生反弹

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
正则表达式的使用

2016-2-27常用符号#-*-coding:utf8-*-import re# from re import findall,search,Sprint('hello world')secret_code = 'feafefexxixx23fe23xxlovexxafeifvaxxyouxx32fe'a = 'xz123'b = re.findall('
复制链接

扫一扫