# -*- coding: utf-8 -*-
import re
old_url = 'http://www.jikexueyuan.com/course/android/?pageNum=2'
total_page = 20
f = open('1.wenben.txt','r+')
html = f.read()
f.close()
# re.S 包括换行
# 抓取标题 search 找到内容后自动停止查找 findall则是遍历
title = re.search('
(.*?)',html,re.S).group(1)print title
# sub的使用
s = '123adsg123'
output = re.sub('123(.*?)123','houzhong%d'%88,s)
print output
不要使用compile。
#匹配数字
a = 'asdfsf12313dfadfad'
b = re.findall('\d',a)
print b
结果:['1', '2', '3', '1', '3']
a = 'asdfsf12313dfadfad2131'
b = re.findall('\d+',a)
print b
结果:['12313', '2131']
翻页功能 re.sub
import re
old_url = 'http://www.jikexueyuan.com/course/android/?pageNum=2'
total_page = 20
for i in range(total_page):
i += 1
new_url = re.sub('pageNum=\d+','pageNum=%d'%i, old_url)
print new_url