直接上代码,使用pandas库读写操作pgsql数据库,第一段代码数据库操作类pg_connecting.py,第二段数据库连接信息类setting.py,第三段实例化
# coding: utf-8
# --利用pandas库读/写pg数据库,pg_connecting.py
--
import psycopg2
import sqlalchemy
import pandas as pd
from datetime import datetime
from collections import OrderedDict # 使用OrderedDict保证字典有序
# --------PostgreSQL--------
class PgsqlConnector:
# ---**kwargs:接收任意数量参数---
def __init__(self, **kwargs):
if len(OrderedDict(kwargs)) >= 2:
for value in OrderedDict(kwargs).values():
if type(value) == psycopg2.extensions.connection:
self.conn = value
elif type(value) == sqlalchemy.engine.base.Engine:
self.engine = value
elif type(value) == str:
self.schema = value
else:
self.schema = 'public'
print('Error.输入应为psycopg2.connect对象和sqlalchemy.create_engine对象,请检查')
else:
print('Error.需要至少2个关键字参数,请检查')
# --使用无变量SQL语句从数据库抽取数据--
def read_table(self, sql):
"""
Arg:
sql: [str]使用的SQL语句
Returns:
data: [DataFrame]查询结果
"""
db = self.conn
cursor = db.cursor()
cursor.execute(sql)
data = cursor.fetchall()
data = pd.DataFrame(list(data))
db.commit()
return data
# --以时间戳为条件从数据库抽取数据--
def read_with_time(self, time, sql):
"""
Args:
time: [str]用于WHERE查询的时间数据
sql: [str]使用的SQL语句
Returns:
data: [DataFrame]查询结果
"""
db = self.conn
cursor = db.cursor()
# 执行sql语句,以time作为条件
cursor.execute(sql % time)
data = cursor.fetchall()
data = pd.DataFrame(list(data))
db.commit()
return data
# --指定列名SELECT,支持LIMIT关键字和单条件WHERE查询--
def select_by_columns(self, tbl, limit=None, **kwargs):
"""
Args:
tbl: [str] 要查询的表明
limit: [int] 设置LIMIT数量
**kwargs: [list] 支持3个自定义字段,顺序固定为[要提取的列名列表(,where语句内容)(,where语句判断变量)]
例如:[['c1','c2'],'create_time >', timestamp变量名]
Returns:
data: [DataFrame]查询结果
"""
params = OrderedDict(kwargs)
db = self.conn
cursor = db.cursor()
sql = "SELECT " + ', '.join(list(params.values())[0]) + " FROM %s"
if len(params) == 2:
sql = sql + ' WHERE ' + list(params.values())[1]
else:
pass
if len(params) == 3:
sql = sql + ' WHERE ' + list(params.values())[1] + " '%s'" % list(params.values())[2]
else:
pass
if limit:
sql = sql + " LIMIT %s" % limit
else:
pass
cursor.execute(sql % tbl)
data = cursor.fetchall()
data = pd.DataFrame(list(data))
db.commit()
return data
# --简单查询--
def simple_query(self, sql):
db = self.conn
cursor = db.cursor()
# 执行sql语句,以time作为条件
cursor.execute(sql)
data = cursor.fetchall()
db.commit()
return data
# --数据写入数据库--
def to_table(self, data, schema, insert_method, target_table):
"""
Args:
data: [Dataframe]需要插入的数据
schema: [str]PgSQL中schema名称
insert_method: [str]插入方式: 可选'append','replace','fail'
target_table: [str]目标表名称
"""
# try:
data.to_sql(name=target_table, schema=self.schema, con=self.engine, if_exists=insert_method, index=False)
print(str(datetime.now()) + ': 处理完成,数据已写入数据库 ')
# except Exception:
# print(str(datetime.now()) + ': 插入失败!')
# --关闭数据库连接--
def close_db(self):
self.conn.close()
这是连接信息模块setting.py
单独列出,方便后续修改,只需在setting.py这个文件下修改或增加连接信息即可
# coding: utf-8
# --存储设置参数--
import psycopg2
import pymysql
from sqlalchemy import create_engine
class PgSetting:
def __init__(self):
self.conn = psycopg2.connect("dbname=*** user=*** password=****** host=*** port=***")
# 根据自己实际修改
self.engine = create_engine('postgresql+psycopg2://user:password@host:port/dbname')
self.schema = 'public'
在主函数中实例化
# coding: utf-8
from Settings import PgSetting, FileDirs, TFIDFParams
from pg_connection import PgsqlConnector
# 数据库连接实例化
pg = PgSetting()
db = PgsqlConnector(conn=pg.conn, engine=pg.engine, schema=pg.schema)
# 查询语句设置(根据自己需要去列)
cols = ['id_1', 'id_2', 'similarity as content_sim', 'time_1', 'time_2', 'address_1', 'address_2']
# 文本相似度的输出表
source_tbl = 'tbl_content_similarity'
# 结果写入表名
targel_tbl = 'tbl_address_sim'
# 插入方式:替换
insert_method = 'replace'
# ----主进程----
# 处理数据文件并计算相似度
rawdata = db.select_by_columns(source_tbl, cols=cols)
# 给列赋名,为赋名前是0,1,2……,7,赋名后是id_1,id_2,similarity……,address_2
rawdata.columns = ['id_1', 'id_2', 'similarity', 'time_1', 'time_2', 'address_1', 'address_2']
print(rawdata)
运行结果
id_1 id_2 similarity ... address_2
0 12798 16589 0.914289 ... 五常大道绕城桥洞下面
1 12855 332 1.000000 ... 良渚蔬菜市场计家坝湖边
2 12855 383 1.000000 ... 苕溪瓶窑水文站
... ... ... ... ... ...
16897 132343 107350 0.915108 ... 金家渡农贸市场
16898 132343 127116 0.915108 ... 金家渡农贸市场
[16899 rows x 7 columns]