python 网页爬取数据并存到数据库中

# -*- coding: utf-8 -*-
import urllib
import xml.dom.minidom
import MySQLdb
import datetime

TimePoint = datetime.datetime.now()

#mysql
db=MySQLdb.Connect(host="localhost",user="root",passwd="root",db="pythondb",charset="utf8")
cursor = db.cursor()

urlPrefix = 'http://121.28.49.85:8080/datas/hour/'
regiondIds = ['130000']

for regiondId in regiondIds:
   fullUrl = urlPrefix + regiondId + '.xml'
   data=urllib.urlopen(fullUrl).read()

   dom = xml.dom.minidom.parseString(data)
   root = dom.documentElement
   pointers = root.getElementsByTagName("Pointer")

   #print len(pointers)

   '''
   INSERT INTO DATARETEST(Province, City, Area, 
         PositionName, StationCode, Latitude, Longitude, AQI, Quality,
         CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2)
         VALUES ('河北', '石家庄', '发觉县', '观测站', 'a001',
         212, 212, 21, '有', 12, 21, 21, 21, 21, 21, 21, 12)
   '''

   for pointer in pointers:
      Province = '河北省'
      City = pointer.getElementsByTagName('City')[0].firstChild.nodeValue.encode("utf-8")
      Area = pointer.getElementsByTagName('Region')[0].firstChild.nodeValue.encode("utf-8")
      PositionName = pointer.getElementsByTagName('Name')[0].firstChild.nodeValue.encode("utf-8")
      StationCode = pointer.getElementsByTagName('Color')[0].firstChild.nodeValue.encode("utf-8")
      Latitude = pointer.getElementsByTagName('CLat')[0].firstChild.nodeValue.encode("utf-8")
      Longitude = pointer.getElementsByTagName('CLng')[0].firstChild.nodeValue.encode("utf-8")
      AQI = pointer.getElementsByTagName('AQI')[0].firstChild.nodeValue.encode("utf-8")
      Quality = pointer.getElementsByTagName('Level')[0].firstChild.nodeValue.encode("utf-8")

      polls = pointer.getElementsByTagName('Poll')

      for poll in polls:
         CO = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
         NO2 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
         O3 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
         PM10 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
         O3_8h = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
         PM2_5 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
         PM2_5_24h = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue
         SO2 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue

      if CO == '--':
         CO = 0
      if NO2 == '--':
         NO2 = 0
      if O3 == '--':
         O3 = 0 
      if PM10 == '--':
         PM10 = 0 
      if O3_8h == '--':
         O3_8h = 0 
      if PM2_5 == '--':
         PM2_5 = 0 
      if PM2_5_24h == '--':
         PM2_5_24h = 0
      if SO2 == '--':
         SO2 = 0              

      cursor.execute('INSERT INTO DATARETEST(Province, City, Area, \
      PositionName, StationCode, Latitude, Longitude, AQI, Quality,\
      CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2, TimePoint)\
      VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)', \
      (Province, City, Area, PositionName, StationCode, Latitude, Longitude, AQI, Quality,
      CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2, TimePoint))
   db.commit()
   db.close()           


'''
   INSERT INTO DATARETEST(Province, City, Area, 
         PositionName, StationCode, Latitude, Longitude, AQI, Quality,
         CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2)
         VALUES ('河北', '石家庄', '发觉县', '观测站', 'a001',
         212, 212, 21, '有', 12, 21, 21, 21, 21, '32', 21, 12)
'''
      
         
      
      
         

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值