eval函数,将字符串格式的列表转为列表,并且支持字典操作
import requests
import pymysql,random,datetime
import re,time,datetime
def create_table(database):
db = pymysql.connect(host='localhost', port=3306, user='root', password='mysql', db='water',
charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)
cursor = db.cursor()
sql= "DROP TABLE IF EXISTS " + str(database)
cursor.execute(sql)
sql2="""
CREATE
TABLE
{database}
(
station VARCHAR(40),
water_level float,
crawl_date date,
data_time VARCHAR(40),
flow double,
other_field VARCHAR(40)
)ENGINE=innodb DEFAULT CHARSET=utf8;
"""
cursor.execute(sql2.format(database=database))
db.close()
def insertIntoChannel(station,water_level,crawl_date,data_time,flow,other,cursor,db):
list=[station,water_level,crawl_date,data_time,flow,other]
print(list)
cursor.execute("insert into Yangtze(station,water_level,crawl_date,data_time,flow,other_field) \
values('%s','%f','%s','%s','%f','%s')" % \
(station,water_level,crawl_date,data_time,flow,other))
db.commit()
#会执行删表操作,千万注意使用
# create_table('Yangtze')
def get_time(timeStamp):
timeStamp = int(timeStamp/1000)
timeArray = time.localtime(timeStamp)
otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S",timeArray)
return otherStyleTime
while(1):
urls=['http://www.cjh.com.cn/sqindex.html','http://zy.cjh.com.cn/sqall.html']
for url in urls:
###开启数据库
db = pymysql.connect(host='localhost', port=3306, user='root', password='mysql', db='water',
charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)
cursor = db.cursor()
content=requests.get(url)
tmp=re.findall( 'var sssq = (.*?)]',content.text)
data=eval(tmp[0]+"]")
crawl_date=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(data)
for i in data:
print(i)
insertIntoChannel(i['stnm'],float(i['z']),crawl_date,get_time(i['tm']),float(i['q']),i['rvnm'].strip(),cursor,db)
print('插入一条')
#关闭数据库
cursor.close()
time.sleep(3600)
抓取长江水文代码
sqlite版本
# -*- coding: UTF-8 -*-
__author__ = 'zy'
__time__ = '2020/7/27 9:32'
import requests
import pymysql,random,datetime
import re,time,datetime
import sqlite3
def create_table(database):
con = sqlite3.connect("water.db")
cursor = con.cursor()
sql= "DROP TABLE IF EXISTS " + str(database)
cursor.execute(sql)
sql2="""
CREATE
TABLE
{database}
(
station VARCHAR(40),
water_level float,
crawl_date date,
data_time VARCHAR(40),
flow double,
other_field VARCHAR(40)
);
"""
cursor.execute(sql2.format(database=database))
con.commit()
cursor.close()
def insertIntoChannel(station,water_level,crawl_date,data_time,flow,other,cursor,con):
list=[station,water_level,crawl_date,data_time,flow,other]
print(list)
cursor.execute("insert into water(station,water_level,crawl_date,data_time,flow,other_field) \
values('%s','%f','%s','%s','%f','%s')" % \
(station,water_level,crawl_date,data_time,flow,other))
con.commit()
#会执行删表操作,千万注意使用
create_table('water')
def get_time(timeStamp):
timeStamp = int(timeStamp/1000)
timeArray = time.localtime(timeStamp)
otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S",timeArray)
return otherStyleTime
while(1):
urls=['http://www.cjh.com.cn/sqindex.html','http://zy.cjh.com.cn/sqall.html']
for url in urls:
###开启数据库
con = sqlite3.connect("water.db")
cursor = con.cursor()
content=requests.get(url)
tmp=re.findall( 'var sssq = (.*?)]',content.text)
data=eval(tmp[0]+"]")
crawl_date=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(data)
for i in data:
print(i)
insertIntoChannel(i['stnm'],float(i['z']),crawl_date,get_time(i['tm']),float(i['q']),i['rvnm'].strip(),cursor,con)
print('插入一条')
#关闭数据库
cursor.close()
time.sleep(3600)