python 之Postgres连接

       朋友们,如果你接触大数据,肯定离不开greenplum,需要你用python进行操作和处理,这里面需要封装很多函数,下面我封装一个较全的类,你们可以自己下载后修改后复用。

import pandas as pd
import psycopg2
import psycopg2.extras
from io import StringIO
import os


class GPDB:
    def __init__(self):
        self.dbname = "bigdata"
        self.user = "bigdata"
        self.password = "bigdata!"
        self.host = "localhost"
        self.port = "5432"

    def gp_connect(self):
        try:
            db = psycopg2.connect(dbname=self.dbname,
                                  user=self.user,
                                  password=self.password,
                                  host=self.host,
                                  port=self.port)
            return db
        except psycopg2.DatabaseError as e:
            print("could not connect to Greenplum server", e)

    def select_data(self, sql):
        conn = self.gp_connect()
        cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
        cur.execute(sql)
        data = cur.fetchall()
        conn.close()  # 关闭连接
        return data
    def truncate_table(self, sql):
        try:
            conn = self.gp_connect()
            cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
            # Delete data row from table
            cur.execute(sql)
            conn.commit()
            cur.close()
            print("TRUNCATE TABLE")
        except Exception as e:
            raise e
        finally:
            cur.close()
            conn.close()
    def insert_df(self, tablename, df: pd.DataFrame):
        placeholders = ', '.join(['%s'] * df.shape[1])
        columns_str = ', '.join(df.columns)
        sql = "insert into {}({})values ({})".format(tablename, columns_str, placeholders)
        conn = self.gp_connect()
        cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
        cur.executemany(sql, df.values)

    def read_df_format(self, sql):
        conn = self.gp_connect()
        df = pd.read_sql(sql=sql, con=conn)
        return df

    def insert_copy_df(self, tablename, df: pd.DataFrame()):
        fp = StringIO()
        df.to_csv(fp, sep='&', index=False, header=False)
        value = fp.getvalue()
        conn = self.gp_connect()
        cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
        cur.copy_from(StringIO(value), tablename, sep="&",
                      columns=df.columns)
        conn.commit()
        cur.close()
        conn.close()

    def copy_from_file(self, table, df_path):
        """
        Here we are going save the dataframe on disk as
        a csv file, load the csv file
        and use copy_from() to copy it to the table
        """
        # Save the dataframe to disk
        f = open(df_path, 'r', encoding="utf8")
        conn = self.gp_connect()
        cursor = self.gp_connect().cursor()
        try:
            cursor.copy_from(f, table, sep="&")
            conn.commit()
        except (Exception, psycopg2.DatabaseError) as error:
            print("Error: %s" % error)
            conn.rollback()
            cursor.close()
            return 1
        print("copy_from_file() done")
        cursor.close()

 上面是连接类,用的时候也非常方便,只需要创建连接,调用即可:

gpdb = GPDB()
gpd = gpdb.read_df_format("select * from demo")

批量数据插入可以用:

gpdb.insert_copy_df("daba.demo", df)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

ITIRONMAN

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值