最近项目需要把sqlserver数据迁移至mongodb数据库
数据转移分为下列几个步骤
- 1、从sql server 数据库取出数据
- a) 使用pyodbc包连接数据库
- b) 获取对应表行数,及最大最小日期
- c) 按日期循环取出数据
- d) 数据存入DataFrame
- 2、数据存入mongodb数据库
- a) 使用pymongo包连接数据库
- b) 使用json把DataFrame数据转换为mongodb类数据
- c) 数据存入数据库
实现代码如下
#读取sqlserver数据
import pyodbc
import pymongo
import pandas as pd
import json
import time
import datetime
from tqdm import tqdm
class db:
def __init__(self,server,user,password,database):
self.conn=pyodbc.connect(driver='{sql server}',server=server,user=user,pwd=password,database=database)
self.cursor=self.conn.cursor()
def get_list(self,table):
sql='Select count(*) as s_coun,max(CreateTime) as t_max,min(CreateTime) as t_min from %s' %(table)
self.cursor.execute(sql)
result=self.cursor.fetchall()[0]
self.d_count=int(result[0]/10000)
self.start=result[1]
self.end=result[2]
return self.d_count,self.start,self.end
def get_data(self,table,d_start,d_end):
self.sql="Select * from %s where CreateTime> '%s' and CreateTime < '%s'" %(table,d_start,d_end)
self.df=pd.read_sql(self.sql,self.conn)
return self.df
def __close__(self):
self.conn.close()
self.cursor.close()
class conn_mongodb():
def __init__(self,server,port):
self.client=pymongo.MongoClient(server,port)
def insert_mongodb(self,database,table,data):
self.db=self.client[database]
self.col=self.db[table]
self.result=self.col.insert_many(json.loads(data.T.to_json()).values())
return self.result
def del_collections(self,database,table):
self.db=self.client[database]
self.col=self.db[table]
self.col.drop()
def __close__(self):
self.client.close()
def main(table):
m_database='KJDB'
m_table=table
x=db('192.192.192.192','sa','xxx','Plc_KJDB')
m=conn_mongodb('localhost',27017)
d_count,d_max,d_min=x.get_list(table)
d_max=d_max.date()
d_min=d_min.date()
start=time.time()
m.del_collections(m_database,m_table)
pbar=tqdm(range(d_count)) #设置进度条
for i in pbar:
t=time.time()
d_start=d_min+datetime.timedelta(days=i)
d_end=d_min+datetime.timedelta(days=i+1)
if d_start>d_max: #限定最大取值时间
break
df=x.get_data(table,d_start,d_end)
if df.empty:
continue
t1=time.time()
err=m.insert_mongodb(m_database,m_table,df)
pbar.set_description('read data:%s count:%s,time:%.2f;write data:result:%s,total_count:%s,time:%.2f'
%(d_start,df.shape[0],t1-t,err.acknowledged,m.col.estimated_document_count(),time.time()-t1))
x.__close__
m.__close__
print('\ntotal time:%.2f' %(time.time()-start))
if __name__ == '__main__':
table='sys0003'
main(table)