#coding:utf8
import urllib2
import time
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import re
def testMatch():
remoteoteURL = r'http://www.sse.com.cn/disclosure/listedinfo/credibility/change/'
request = urllib2.Request(remoteoteURL)
response = urllib2.urlopen(request)
result = response.read() #获取响应的HTML
#匹配表格标题的正则表达式
titlePatternStr = r'
#匹配表格内容的正则表达式
valuePatternStr = r'
(\d*?).*?(\S*).*?(\S*).*?(\S*).*?(\S*).*?(\S*).*?document.write\(\$.format\(\'([\.,\d]*)\'.*?document.write\(\$.format\(\'([\.,\d]*)\'.*?document.write\(\$.format\(\'([\.,\d]*)\'.*?document.write\(\$.format\(\'([\.,\d]*)\'.*?(\S*).*?(\S*).*?(\S*)'valuePattern = re.compile(valuePatternStr, re.S)
titlePattern = re.compile(titlePatternStr, re.S)
#匹配表格标题
titles = re.findall(titlePattern, result)
#匹配表格内容
items = re.findall(valuePattern, result)
#根据匹配的结果生成HTML表格
tableStr = "
titleStr = "
"for title in titles:
for tt in title:
titleStr += '
'+ str(tt)+''titleStr += "
"tableStr += titleStr
for item in items:
tableStr += "
"for name in item:
tableStr += "
%s" % (name)tableStr += "
"tableStr += "
"print tableStr
testMatch()