python 数据库去重操作

# -*- coding: UTF-8 -*-
"""
@version: python2.7.8
@author: XiangguoSun
@contact: sunxiangguodut@qq.com
@file: connectdb.py
@time: 2016/12/28 16:12
"""
import MySQLdb
import time

def connectdb():
    while True:
        try:
            db = MySQLdb.connect("localhost", "root", "root", "douban")
            return db
        except MySQLdb.Error,e:
            print e.args,"(connectdb) we will try again after 10 seconds..."
            time.sleep(10)


下面是对数据库进行操作

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@version: python2.7.8 
@author: XiangguoSun
@contact: sunxiangguodut@qq.com
@file: dbfunction.py
@time: 2017/2/10 11:08
@software: PyCharm
"""
import sys
from connectdb import connectdb
reload(sys)
sys.setdefaultencoding('utf-8')

def fun(tablename):
    db = connectdb()
    cursor = db.cursor()
    print "start transform",tablename,"..."
    sql = "SELECT COUNT(*) as repetitions, uid, time " \
          "FROM " + tablename + " " \
                                "GROUP BY uid, time " \
                                "HAVING repetitions > 1"
    cursor.execute(sql)
    an = cursor.fetchall()
    print "duplicate data: ",len(an)

    sql = "SELECT uid, `time` " \
          "FROM " + tablename + " " \
          "GROUP BY uid, time"
    cursor.execute(sql)

    print "create tmp..."
    sql = "CREATE TABLE tmp SELECT `uid`, `likers_count`, `time`, `url`, `comments_count`, `activity`, `type`, `content`, `crawtime`" \
          "FROM " + tablename + " GROUP BY uid, time"
    cursor.execute(sql)
    print "create tmp done!"

    print "drop table "+tablename+"..."
    sql = "DROP TABLE " + tablename
    cursor.execute(sql)
    print "drop table "+tablename+"done!"

    print "rename table tmp as"+tablename+"..."
    sql = "ALTER TABLE tmp RENAME TO " + tablename
    cursor.execute(sql)
    print "rename done!"

    print "add key..."
    sql = "ALTER TABLE " + tablename + " ADD PRIMARY KEY (`uid`,`time`)"
    cursor.execute(sql)
    print "key added done!"

    print "succeed!"


db = connectdb()
for i in range(1,200):
    fun("userlifestream_"+str(i))

print "all done!"

请注意转义符号的使用··,尤其是在sql语句GROUP 等

还有增加联合主键:没有转意字符会报错

转义操作符是键盘上~键对应的

print "add key..."
sql = "ALTER TABLE " + tablename + " ADD PRIMARY KEY (`uid`,`time`)"
cursor.execute(sql)
print "key added done!"

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值