UpYunrSync-LevelDB

__author__ = 'glcsnz123'
# -*- coding: utf-8 -*-
import time
import os
import datetime
import thread
import logging
import sys
import upyun
import signal
import Queue
import json
import leveldb
import hashlib


class UpYunLogger:
    def __init__(self, name):
        self.__log_model = logging.getLogger(name)
        self.__printLock = thread.allocate_lock()

    def info(self, text):
        self.__printLock.acquire()
        self.__log_model.info(text)
        self.__printLock.release()

    def error(self, text):
        self.__printLock.acquire()
        self.__log_model.error(text)
        self.__printLock.release()

    def debug(self, text):
        self.__printLock.acquire()
        self.__log_model.debug(text)
        self.__printLock.release()

    def warning(self, text):
        self.__printLock.acquire()
        self.__log_model.warning(text)
        self.__printLock.release()

    def setLevel(self, type):
        self.__log_model.setLevel(logging.INFO)

    def addHandler(self, hdlr):
        self.__log_model.addHandler(hdlr)

#------------CONFIG--------------------
LOGGER = UpYunLogger("UpYunrSync")
__LOGLEVEL = logging.INFO
LOGFILE = "/tmp/UpYunSync.log"  #日志文件路径与文件名
JSON_FILE = "uprsync.json"
ERROR_FILE = "/tmp/UpYunErrorFile.list"

MAX_QUEUENUM = 100000
#--------------------------------------



def initLogger(LogName="UpYunrSync"):
    global LOGGER, LOGFILE, __LOGLEVEL
    LOGGER = logging.getLogger(LogName)
    LOGGER.setLevel(logging.INFO)
    formatter = logging.Formatter('[%(asctime)s]    [%(levelname)s]     %(message)s')
    hdlr1 = logging.FileHandler(LOGFILE)
    hdlr1.setFormatter(formatter)
    hdlr1.setLevel(__LOGLEVEL)
    LOGGER.addHandler(hdlr1)
    hdlr2 = logging.StreamHandler(sys.stdout)#添加标准输出的处理
    hdlr2.setFormatter(formatter)
    hdlr2.setLevel(logging.DEBUG)
    LOGGER.addHandler(hdlr2)


def SetLoggerLevel(loglevel=""):#设置日志等级,优先级排列为:DEBUG < INFO < WARNING < ERROR < OFF
    global __LOGLEVEL
    if loglevel.upper() == "INFO":
        __LOGLEVEL = logging.INFO
    elif loglevel.upper() == "WARNING":
        __LOGLEVEL = logging.WARNING
    elif loglevel.upper() == "ERROR":
        __LOGLEVEL = logging.ERROR
    elif loglevel.upper() == "DEBUG":
        __LOGLEVEL = logging.DEBUG

#获取文件的最后一次修改时间,返回格式为"YYYY-MM-DD hh:mm:ss",如果文件/目录不存在,则返回"1970-01-01 08:00:00"
def getLastModifyTime(pathname):#获取最后一次修改时间
    if os.path.isdir(pathname) or os.path.isfile(pathname):
        return (datetime.datetime.fromtimestamp(os.path.getmtime(pathname))).strftime("%F %X")
    return (datetime.datetime.fromtimestamp(0)).strftime("%F %X")


class UpYunUprSync:
    def __init__(self, BUCKETNAME, USERNAME, PASSWORD, SRC_DIR, DST_DIR, DBDIR, THREAD_NUM=10, **kwargs):
        SRC_DIR = os.path.abspath(SRC_DIR)
        if SRC_DIR.endswith(os.sep):
            SRC_DIR = SRC_DIR[0:-1:]
        if DST_DIR.endswith(os.sep):
            DST_DIR = DST_DIR[0:-1:]
        self.__BUCKETNAME = BUCKETNAME
        self.__USERNAME = USERNAME
        self.__PASSWORD = PASSWORD
        self.__SRC_DIR = SRC_DIR
        self.__DST_DIR = DST_DIR
        self.__HEADERS = {"x-gmkerl-rotate": "180"}
        if not os.path.isdir(os.sep.join([os.environ['HOME'], ".UpYunDB"])):
            os.mkdir(os.sep.join([os.environ['HOME'], ".UpYunDB"]))
        self.__DBDIR = os.sep.join([os.environ['HOME'], ".UpYunDB", DBDIR])
        self.__WORKER_LIMIT = max(1, THREAD_NUM)
        self.__ErrorFileList = []
        self.__mylock = thread.allocate_lock() #内部锁
        if kwargs.has_key("TASK_AGAIN"):
            self.__LevDB = UpYunLevelDB(self.__SRC_DIR, self.__DBDIR,kwargs["TASK_AGAIN"].lower() == "yes")# 初始化levelDB
        else:
            self.__LevDB = UpYunLevelDB(self.__SRC_DIR, self.__DBDIR)# 初始化levelDB
        self.__InitLogIn()
        signal.signal(signal.SIGINT, self.__Ctrl_C_EXIT)#信号绑定

    def __InitLogIn(self):
        self.__CilentUp = upyun.UpYun(self.__BUCKETNAME, self.__USERNAME, self.__PASSWORD, timeout=30,
                                      endpoint=upyun.ED_AUTO)

    def __CreatePath(self, path):
        path = path[self.__SRC_DIR.__len__() + 1:]
        path = path.split(os.sep)
        for i in range(1, len(path) + 1):
            tpath = self.__SRC_DIR + os.sep + os.sep.join(path[:i])
            if self.__LevDB.getPathFromSQL(tpath) == False:
                for attemp in range(1,4):
                    try:
                        self.__CilentUp.mkdir("".join([self.__DST_DIR, tpath[self.__SRC_DIR.__len__():]]))
                        break
                    except upyun.upyun.UpYunServiceException, ueer:
                        LOGGER.error(
                            "Error code: " + str(ueer.status) + " [Failed attemp %d]Create path: " % attemp + "".join(
                                [self.__DST_DIR, tpath[self.__SRC_DIR.__len__():]]) + " failed!")
                LOGGER.info("".join(["Create path: ", self.__DST_DIR, tpath[self.__SRC_DIR.__len__():], " oked!"]))
                self.__LevDB.insertPathToSQL(self.__SRC_DIR + os.sep + os.sep.join(path[:i]))
        return True

    def __CountPathLevel(self, path):
        path = (self.__DST_DIR + path[self.__SRC_DIR.__len__():]).split(os.sep)
        return len(path)

    def __FileSync(self, fpath):
        for attemp in range(1, 4):
            try:
                self.__CilentUp.put("".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]), open(fpath).read())
                LOGGER.info(("".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]])) + " oked")
                return True
            except upyun.UpYunClientException as ce:
                result = (fpath, ce.msg)
                LOGGER.error("".join(
                    ["Failed attemp[%d]" % attemp, self.__DST_DIR, fpath[self.__SRC_DIR.__len__():], " failed! <",
                     ce.msg,
                     ">"]))
            except upyun.UpYunServiceException as se:      #--此处要建目录操作
                result = (fpath, str(se.status))
                LOGGER.error("".join(
                    ["Failed attemp[%d]" % attemp, self.__DST_DIR, fpath[self.__SRC_DIR.__len__():], " failed! - ",
                     str(se.status), " <", se.msg, ">"]))
        self.__ErrorFileList.append(result)
        return False

    def __getFileList(self, fpath):
        start = datetime.datetime.now()
        if fpath.endswith(os.sep):
            pathStack = [fpath[0:-1:1]]
        else:
            pathStack = [fpath]
        self.__DFS_FINISHED = False
        self.__JobFileList = Queue.Queue(MAX_QUEUENUM)  #用来存放需要上传的文件的路径
        dirList = []     #初始化目录
        try:
            tmpList = os.listdir(fpath)
        except OSError, e:
            LOGGER.error(fpath.join(["Permission Denied! ", ""]))
            self.__DFS_FINISHED = True
            return
        self.__mylock.release()
        for filename in tmpList:
            fullname = os.sep.join(pathStack) + os.sep + filename
            if os.path.isdir(fullname):
                dirList.append(filename)
            elif os.path.islink(fullname):
                LOGGER.warning(fullname.join(["file: ", " is a symbol link file"]))
            else:
                resObj = self.__LevDB.getFileFromSQL(fullname)
                if resObj is not None and resObj == getLastModifyTime(fullname):
                    LOGGER.info(fullname.join(["file: ", " skipped"]))
                    continue
                self.__JobFileList.put(fullname, block=True)

        while dirList.__len__() > 0:
            if dirList[-1] == "":
                pathStack.pop()
                dirList.pop()
                continue
            try:
                tmpList = os.listdir(os.sep.join(pathStack) + os.sep + dirList[-1])
            except Exception, e:
                LOGGER.warning("".join(["Permission Denied! ", os.sep.join(pathStack), os.sep, dirList[-1]]))
                continue
            pathStack.append(dirList[-1])
            dirList.pop()
            dirList.append("")
            currentDir = os.sep.join(pathStack)
            for filename in tmpList:
                fullname = os.sep.join([currentDir, filename])
                if os.path.isdir(fullname):
                    dirList.append(filename)
                elif os.path.islink(fullname):
                    LOGGER.warning(fullname.join(["file: ", " is a symbol link file"]))
                else:
                    resObj = self.__LevDB.getFileFromSQL(fullname)
                    if resObj is not None and resObj == getLastModifyTime(fullname):
                        LOGGER.info(fullname.join(["file: ", " skipped!"]))
                        continue
                    self.__JobFileList.put(fullname, block=True)
        self.__DFS_FINISHED = True    #此处代表已经完成了对目录的遍历工作,标记
        LOGGER.info("".join(["Finish the dfs after ", (datetime.datetime.now() - start).seconds.__str__(), " s\n"]))

    def __Worker(self):
        try:
            waiting = 1
            while True:
                try:
                    fpath = self.__JobFileList.get(block=True, timeout=waiting)
                except Exception, e:
                    if self.__DFS_FINISHED:
                        return
                    waiting = min(waiting * 2, 20)
                    continue
                waiting = max(waiting / 2, 1)
                if os.access(fpath, os.R_OK) == False:
                    self.__ErrorFileList.append((fpath, "Need Read Access!"))
                    LOGGER.error(fpath.join([" Permission Denied! ", " Need Read Access!"]))
                    continue
                if self.__CountPathLevel(os.path.dirname(fpath)) > 7:
                    self.__CreatePath(os.path.dirname(fpath))
                if self.__FileSync(fpath) == True:     #if sync success, update the datebase.
                    self.__LevDB.insertFileToSQL(fpath)
        finally:
            self.__WORKER_NOW -= 1

    def setThreadNumLimit(self, T_Limit):
        self.__WORKER_LIMIT = max(1, T_Limit)

    def __Ctrl_C_EXIT(self, a, b):
        LOGGER.warning("SIGINT: Interrupt the process!")
        sys.exit("SIGINT:INTERRUPT!")

    def runMultiThreadSync(self):
        start = datetime.datetime.now()
        self.__WORKER_NOW = self.__WORKER_LIMIT
        self.__InitLogIn()
        self.__CreatePath(self.__DST_DIR)
        self.__mylock.acquire()
        thread.start_new_thread(self.__getFileList, (self.__SRC_DIR,))
        self.__mylock.acquire()
        for i in range(self.__WORKER_LIMIT):
            thread.start_new_thread(self.__Worker, ())
        self.__mylock.release()
        ERROR_FILE = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + "_error_file.list"
        self.error_file = open(ERROR_FILE, "w")
        while self.__WORKER_NOW > 0:
            while self.__WORKER_NOW < self.__WORKER_LIMIT and self.__JobFileList.qsize() > 0:
                self.__WORKER_NOW += 1
                thread.start_new_thread(self.__Worker, ())
                LOGGER.debug("Create a new Thread!")
            for i in range(2):
                LOGGER.debug("".join([str(self.__JobFileList.qsize()), " files are found and waiting for sync."]))
                time.sleep(5)
            self.RollBack()
        self.RollBack()
        self.error_file.close()
        LOGGER.info("".join(["Finish uprsync after ", str((datetime.datetime.now() - start).seconds), " s"]))

    def RollBack(self):
        #在写入之前,要先进行三次重传
        while self.__ErrorFileList.__len__() > 0:
            fpath, ecode = self.__ErrorFileList.pop()
            try:
                self.error_file.write(fpath + "\t\t" + ecode + "\n")
            except UnicodeEncodeError, ueer:
                self.error_file.write(fpath.encode("utf-8") + "\t\t" + ecode + "\n")


#---------------------------- DB ----------------------------------------
class UpYunLevelDB:
    """        元数据的操作    """

    def __init__(self, SRC_DIR, DBDIR, OverWrite = False):
        self.SRC_DIR = SRC_DIR
        self.DBDIR = DBDIR
        if OverWrite:
            self.ClearDBFile()
        if os.access(self.SRC_DIR, os.W_OK):
            self.levDB = leveldb.LevelDB(self.DBDIR)
        else:
            LOGGER.error(self.SRC_DIR.join([" Permission Denied! ", " Need Write Access!"]))
            LOGGER.error("LevelDB server run failed!\n")
            self.levDB = None

    def ClearDBFile(self):
        for root,dirs,files in os.walk(self.DBDIR,topdown=False):
            for name in files:
                os.remove(os.path.join(root,name))
            for name in dirs:
                os.rmdir(os.path.join(root,name))

    def reLoadLevelDB(self):
        self.levDB = leveldb.LevelDB(self.DBDIR)

    def getFileFromSQL(self, fapath):
        if self.levDB is None: return
        LOGGER.debug("".join(["getFileFromSQL: ", fapath[self.SRC_DIR.__len__():]]))
        try:
            resObj = self.levDB.Get(fapath[self.SRC_DIR.__len__():])
        except KeyError, ker:
            LOGGER.debug(fapath[self.SRC_DIR.__len__():] + " not found!")
            return None
        except UnicodeEncodeError, ueer:
            LOGGER.debug("".join(["Chinese character coding file: ", fapath[self.SRC_DIR.__len__():]]))
            try:
                resObj = self.levDB.Get(fapath[self.SRC_DIR.__len__():].encode("utf-8"))
            except KeyError, ker:
                return None
        return resObj

    def insertFileToSQL(self, fpath):
        if self.levDB is None: return
        LOGGER.debug(
            "".join(["File.Put: key: ", fpath[self.SRC_DIR.__len__():], " value: ", getLastModifyTime(fpath)]))
        try:
            self.levDB.Put(fpath[self.SRC_DIR.__len__():], getLastModifyTime(fpath))
        except UnicodeEncodeError, ueer:
            self.levDB.Put(fpath[self.SRC_DIR.__len__():].encode("utf-8"), getLastModifyTime(fpath))

    def insertPathToSQL(self, fpath):
        if self.levDB is None: return
        LOGGER.debug("".join(["Path.Put: key: ", fpath[self.SRC_DIR.__len__():], " value: ok"]))
        try:
            self.levDB.Put(fpath[self.SRC_DIR.__len__():], "1")
        except UnicodeEncodeError, ueer:
            self.levDB.Put(fpath[self.SRC_DIR.__len__():].encode("utf-8"), "1")

    def getPathFromSQL(self, fpath):
        if self.levDB is None: return
        LOGGER.debug("".join(["getPathFromSQL: ", fpath[self.SRC_DIR.__len__():]]))
        try:
            return self.levDB.Get(fpath[self.SRC_DIR.__len__():])
        except KeyError, ker:
            LOGGER.debug(fpath[self.SRC_DIR.__len__():] + " not found!")
            return False
        except UnicodeEncodeError, ueer:
            LOGGER.debug("".join(["Chinese character coding file: ", fpath[self.SRC_DIR.__len__():]]))
            try:
                return self.levDB.Get(fpath[self.SRC_DIR.__len__():].encode("utf-8"))
            except KeyError, ker:
                return False


def main(args):
    global JSON_FILE
    if args:
        JSON_FILE = args[0]
    if not os.path.isfile(JSON_FILE):
        return JSON_FILE + " is not a file!"
    argv = open(JSON_FILE).read()
    try:
        argv = json.loads(argv)       #读取json文件
    except ValueError, ver:
        return JSON_FILE + " is not a json file!"
    if argv.has_key("LOG_LEVEL"):
        SetLoggerLevel(argv["LOG_LEVEL"])
    initLogger() #初始化日志
    try:
        argv["DBDIR"] = hashlib.new("md5", "?\\\\_.///".join([argv["SRC_DIR"], argv["DST_DIR"]])).hexdigest()
    except UnicodeEncodeError, ueer:
        argv["DBDIR"] = hashlib.new("md5", "?\\\\_.///".join(
            [argv["SRC_DIR"].encode("utf-8"), argv["DST_DIR"].encode("utf-8")])).hexdigest()
    ups = UpYunUprSync(**argv)
    ups.runMultiThreadSync()    #执行任务
    return 0


if __name__ == "__main__":
    status = main(sys.argv[1:])
    sys.exit(status)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值