#coding=utf-8
#coding=gbk
import os
import sys
import re
import time
import subprocess
import MySQLdb
import urllib
import urllib2
reload(sys)
def getHtmlData(areaCode):
# url='http://sq.weather.com.cn/mweather/101280601.shtml'
url=' http://m.weather.com.cn/mweather/%s.shtml'%areaCode
# url='http://e.weather.com.cn/d/index/101010100.shtml'
# url='http://www.weather.com.cn/'
# url='http://www.weather.com.cn/weather1d/101010100.shtml'
req_header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding':'en-us',
'Connection':'keep-alive',
'Referer':'http://www.weather.com.cn/'
}
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
header = { 'User-Agent' : user_agent ,
'Referer':url}
try:
request = urllib2.Request(url,headers = header)
response = urllib2.urlopen(request)
content = response.read().decode("utf-8")
# decode('utf8')
# print content
# pattern = re.compile('<div.*?author">.*?<a.*?<img.*?>(.*?)</a>.*?<div.*?'+'content">(.*?)<!--(.*?)-->.*?</div>(.*?)<div class="stats.*?class="number">(.*?)</i>',re.S)
# pattern = re.compile('<div class="today clearfix" id="today">.+<p class="tem">.+<span>(\d)</span>.+</p>.+</div>.+<ul class="clearfix">',re.S)
# pattern = re.compile('<input type="hidden" id="hidden_title" value=(.+)/>$',re.S)
pattern = re.compile('dataSK.+=.+(\{.*date.+\d.+\d.+\(.+\)"\})',re.M)
items = re.findall(pattern,content)
# pattern1 = re.compile('<li>\n<b>(.+)</b>\n</li>\n<img.*alt(.+).+\n<img.*alt(.+).+\n</li>\n<span>(.+)</span>',re.S)
pattern1 = re.compile('<b>(?P<week>.+)</b>\n<i>\n<.+alt=(?P<weather>.+)\/\>\n<.+alt=(?P<weather2>.+)\/\>\n</i>\n<span>(?P<tmpArea>.+)</span>',re.M)
items1 = re.findall(pattern1,content)
print items1
for item in items1:
print item[0],item[1],item[2],item[3]
dictTmpWeather={}
for item in items:
dictTmpWeather= eval(item)
# print dictTmpWeather
for key in dictTmpWeather:
# pass
print key,dictTmpWeather[key]
# print dictTmpWeather['cityname'] ,dictTmpWeather['city'],dictTmpWeather['temp']
except urllib2.URLError, e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
def getwordClockHtmlData(area):
# url='http://sq.weather.com.cn/mweather/101280601.shtml'
url='http://www.timedate.cn/worldclock/results.asp?query=%s'%area
# url='http://e.weather.com.cn/d/index/101010100.shtml'
# url='http://www.weather.com.cn/'
# url='http://www.weather.com.cn/weather1d/101010100.shtml'
req_header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding':'en-us',
'Connection':'keep-alive',
'Referer':'http://www.timedate.cn/'
}
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
header = { 'User-Agent' : user_agent ,
'Referer':url}
try:
request = urllib2.Request(url,headers = header)
response = urllib2.urlopen(request)
content = response.read()
#print content
pattern=re.compile(r"nyear=(\d+);\r\nnmonth=(\d+);\r\nnday=(\d+);\r\nnwday=(\d+);\r\nnhrs=(\d+);\r\nnmin=(\d+);\r\nnsec=(\d+);",re.M)
allItems=re.findall(pattern,content)
year,month,nday,nWeek,nhour,nmin,nsec=allItems[0]
print "year=%s,mounth=%s,nday=%s,nweek=%s,nhour=%s,nmin=%s,nsec=%s"%(year,month,nday,nWeek,nhour,nmin,nsec)
except urllib2.URLError, e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
if __name__ == "__main__":
getHtmlData('101010100')
getwordClockHtmlData("Auckland")
#coding=gbk
import os
import sys
import re
import time
import subprocess
import MySQLdb
import urllib
import urllib2
reload(sys)
def getHtmlData(areaCode):
# url='http://sq.weather.com.cn/mweather/101280601.shtml'
url=' http://m.weather.com.cn/mweather/%s.shtml'%areaCode
# url='http://e.weather.com.cn/d/index/101010100.shtml'
# url='http://www.weather.com.cn/'
# url='http://www.weather.com.cn/weather1d/101010100.shtml'
req_header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding':'en-us',
'Connection':'keep-alive',
'Referer':'http://www.weather.com.cn/'
}
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
header = { 'User-Agent' : user_agent ,
'Referer':url}
try:
request = urllib2.Request(url,headers = header)
response = urllib2.urlopen(request)
content = response.read().decode("utf-8")
# decode('utf8')
# print content
# pattern = re.compile('<div.*?author">.*?<a.*?<img.*?>(.*?)</a>.*?<div.*?'+'content">(.*?)<!--(.*?)-->.*?</div>(.*?)<div class="stats.*?class="number">(.*?)</i>',re.S)
# pattern = re.compile('<div class="today clearfix" id="today">.+<p class="tem">.+<span>(\d)</span>.+</p>.+</div>.+<ul class="clearfix">',re.S)
# pattern = re.compile('<input type="hidden" id="hidden_title" value=(.+)/>$',re.S)
pattern = re.compile('dataSK.+=.+(\{.*date.+\d.+\d.+\(.+\)"\})',re.M)
items = re.findall(pattern,content)
# pattern1 = re.compile('<li>\n<b>(.+)</b>\n</li>\n<img.*alt(.+).+\n<img.*alt(.+).+\n</li>\n<span>(.+)</span>',re.S)
pattern1 = re.compile('<b>(?P<week>.+)</b>\n<i>\n<.+alt=(?P<weather>.+)\/\>\n<.+alt=(?P<weather2>.+)\/\>\n</i>\n<span>(?P<tmpArea>.+)</span>',re.M)
items1 = re.findall(pattern1,content)
print items1
for item in items1:
print item[0],item[1],item[2],item[3]
dictTmpWeather={}
for item in items:
dictTmpWeather= eval(item)
# print dictTmpWeather
for key in dictTmpWeather:
# pass
print key,dictTmpWeather[key]
# print dictTmpWeather['cityname'] ,dictTmpWeather['city'],dictTmpWeather['temp']
except urllib2.URLError, e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
def getwordClockHtmlData(area):
# url='http://sq.weather.com.cn/mweather/101280601.shtml'
url='http://www.timedate.cn/worldclock/results.asp?query=%s'%area
# url='http://e.weather.com.cn/d/index/101010100.shtml'
# url='http://www.weather.com.cn/'
# url='http://www.weather.com.cn/weather1d/101010100.shtml'
req_header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding':'en-us',
'Connection':'keep-alive',
'Referer':'http://www.timedate.cn/'
}
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
header = { 'User-Agent' : user_agent ,
'Referer':url}
try:
request = urllib2.Request(url,headers = header)
response = urllib2.urlopen(request)
content = response.read()
#print content
pattern=re.compile(r"nyear=(\d+);\r\nnmonth=(\d+);\r\nnday=(\d+);\r\nnwday=(\d+);\r\nnhrs=(\d+);\r\nnmin=(\d+);\r\nnsec=(\d+);",re.M)
allItems=re.findall(pattern,content)
year,month,nday,nWeek,nhour,nmin,nsec=allItems[0]
print "year=%s,mounth=%s,nday=%s,nweek=%s,nhour=%s,nmin=%s,nsec=%s"%(year,month,nday,nWeek,nhour,nmin,nsec)
except urllib2.URLError, e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
if __name__ == "__main__":
getHtmlData('101010100')
getwordClockHtmlData("Auckland")