# -*- coding: utf-8 -*-
import urllib
import xml.dom.minidom
import MySQLdb
import datetime
TimePoint = datetime.datetime.now()
#mysql
db=MySQLdb.Connect(host="localhost",user="root",passwd="root",db="pythondb",charset="utf8")
cursor = db.cursor()
urlPrefix = 'http://121.28.49.85:8080/datas/hour/'
regiondIds = ['130000']
for regiondId in regiondIds:
fullUrl = urlPrefix + regiondId + '.xml'
data=urllib.urlopen(fullUrl).read()
dom = xml.dom.minidom.parseString(data)
root = dom.documentElement
pointers = root.getElementsByTagName("Pointer")
#print len(pointers)
'''
INSERT INTO DATARETEST(Province, City, Area,
PositionName, StationCode, Latitude, Longitude, AQI, Quality,
CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2)
VALUES ('河北', '石家庄', '发觉县', '观测站', 'a001',
212, 212, 21, '有', 12, 21, 21, 21, 21, 21, 21, 12)
'''
for pointer in pointers:
Province = '河北省'
City = pointer.getElementsByTagName('City')[0].firstChild.nodeValue.encode("utf-8")
Area = pointer.getElementsByTagName('Region')[0].firstChild.nodeValue.encode("utf-8")
PositionName = pointer.getElementsByTagName('Name')[0].firstChild.nodeValue.encode("utf-8")
StationCode = pointer.getElementsByTagName('Color')[0].firstChild.nodeValue.encode("utf-8")
Latitude = pointer.getElementsByTagName('CLat')[0].firstChild.nodeValue.encode("utf-8")
Longitude = pointer.getElementsByTagName('CLng')[0].firstChild.nodeValue.encode("utf-8")
AQI = pointer.getElementsByTagName('AQI')[0].firstChild.nodeValue.encode("utf-8")
Quality = pointer.getElementsByTagName('Level')[0].firstChild.nodeValue.encode("utf-8")
polls = pointer.getElementsByTagName('Poll')
for poll in polls:
CO = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
NO2 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
O3 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
PM10 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
O3_8h = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
PM2_5 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
PM2_5_24h = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
SO2 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
if CO == '--':
CO = 0
if NO2 == '--':
NO2 = 0
if O3 == '--':
O3 = 0
if PM10 == '--':
PM10 = 0
if O3_8h == '--':
O3_8h = 0
if PM2_5 == '--':
PM2_5 = 0
if PM2_5_24h == '--':
PM2_5_24h = 0
if SO2 == '--':
SO2 = 0
cursor.execute('INSERT INTO DATARETEST(Province, City, Area, \
PositionName, StationCode, Latitude, Longitude, AQI, Quality,\
CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2, TimePoint)\
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)', \
(Province, City, Area, PositionName, StationCode, Latitude, Longitude, AQI, Quality,
CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2, TimePoint))
db.commit()
db.close()
'''
INSERT INTO DATARETEST(Province, City, Area,
PositionName, StationCode, Latitude, Longitude, AQI, Quality,
CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2)
VALUES ('河北', '石家庄', '发觉县', '观测站', 'a001',
212, 212, 21, '有', 12, 21, 21, 21, 21, '32', 21, 12)
'''
python 网页爬取数据并存到数据库中
最新推荐文章于 2024-03-29 16:44:41 发布