__author__ = 'glcsnz123'
# -*- coding: utf-8 -*-
import time
import os
import datetime
import thread
import logging
import sys
import upyun
import signal
import Queue
import json
import leveldb
import hashlib
class UpYunLogger:
def __init__(self, name):
self.__log_model = logging.getLogger(name)
self.__printLock = thread.allocate_lock()
def info(self, text):
self.__printLock.acquire()
self.__log_model.info(text)
self.__printLock.release()
def error(self, text):
self.__printLock.acquire()
self.__log_model.error(text)
self.__printLock.release()
def debug(self, text):
self.__printLock.acquire()
self.__log_model.debug(text)
self.__printLock.release()
def warning(self, text):
self.__printLock.acquire()
self.__log_model.warning(text)
self.__printLock.release()
def setLevel(self, type):
self.__log_model.setLevel(logging.INFO)
def addHandler(self, hdlr):
self.__log_model.addHandler(hdlr)
#------------CONFIG--------------------
LOGGER = UpYunLogger("UpYunrSync")
__LOGLEVEL = logging.INFO
LOGFILE = "/tmp/UpYunSync.log" #日志文件路径与文件名
JSON_FILE = "uprsync.json"
ERROR_FILE = "/tmp/UpYunErrorFile.list"
MAX_QUEUENUM = 100000
#--------------------------------------
def initLogger(LogName="UpYunrSync"):
global LOGGER, LOGFILE, __LOGLEVEL
LOGGER = logging.getLogger(LogName)
LOGGER.setLevel(logging.INFO)
formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] %(message)s')
hdlr1 = logging.FileHandler(LOGFILE)
hdlr1.setFormatter(formatter)
hdlr1.setLevel(__LOGLEVEL)
LOGGER.addHandler(hdlr1)
hdlr2 = logging.StreamHandler(sys.stdout)#添加标准输出的处理
hdlr2.setFormatter(formatter)
hdlr2.setLevel(logging.DEBUG)
LOGGER.addHandler(hdlr2)
def SetLoggerLevel(loglevel=""):#设置日志等级,优先级排列为:DEBUG < INFO < WARNING < ERROR < OFF
global __LOGLEVEL
if loglevel.upper() == "INFO":
__LOGLEVEL = logging.INFO
elif loglevel.upper() == "WARNING":
__LOGLEVEL = logging.WARNING
elif loglevel.upper() == "ERROR":
__LOGLEVEL = logging.ERROR
elif loglevel.upper() == "DEBUG":
__LOGLEVEL = logging.DEBUG
#获取文件的最后一次修改时间,返回格式为"YYYY-MM-DD hh:mm:ss",如果文件/目录不存在,则返回"1970-01-01 08:00:00"
def getLastModifyTime(pathname):#获取最后一次修改时间
if os.path.isdir(pathname) or os.path.isfile(pathname):
return (datetime.datetime.fromtimestamp(os.path.getmtime(pathname))).strftime("%F %X")
return (datetime.datetime.fromtimestamp(0)).strftime("%F %X")
class UpYunUprSync:
def __init__(self, BUCKETNAME, USERNAME, PASSWORD, SRC_DIR, DST_DIR, DBDIR, THREAD_NUM=10, **kwargs):
SRC_DIR = os.path.abspath(SRC_DIR)
if SRC_DIR.endswith(os.sep):
SRC_DIR = SRC_DIR[0:-1:]
if DST_DIR.endswith(os.sep):
DST_DIR = DST_DIR[0:-1:]
self.__BUCKETNAME = BUCKETNAME
self.__USERNAME = USERNAME
self.__PASSWORD = PASSWORD
self.__SRC_DIR = SRC_DIR
self.__DST_DIR = DST_DIR
self.__HEADERS = {"x-gmkerl-rotate": "180"}
if not os.path.isdir(os.sep.join([os.environ['HOME'], ".UpYunDB"])):
os.mkdir(os.sep.join([os.environ['HOME'], ".UpYunDB"]))
self.__DBDIR = os.sep.join([os.environ['HOME'], ".UpYunDB", DBDIR])
self.__WORKER_LIMIT = max(1, THREAD_NUM)
self.__ErrorFileList = []
self.__mylock = thread.allocate_lock() #内部锁
if kwargs.has_key("TASK_AGAIN"):
self.__LevDB = UpYunLevelDB(self.__SRC_DIR, self.__DBDIR,kwargs["TASK_AGAIN"].lower() == "yes")# 初始化levelDB
else:
self.__LevDB = UpYunLevelDB(self.__SRC_DIR, self.__DBDIR)# 初始化levelDB
self.__InitLogIn()
signal.signal(signal.SIGINT, self.__Ctrl_C_EXIT)#信号绑定
def __InitLogIn(self):
self.__CilentUp = upyun.UpYun(self.__BUCKETNAME, self.__USERNAME, self.__PASSWORD, timeout=30,
endpoint=upyun.ED_AUTO)
def __CreatePath(self, path):
path = path[self.__SRC_DIR.__len__() + 1:]
path = path.split(os.sep)
for i in range(1, len(path) + 1):
tpath = self.__SRC_DIR + os.sep + os.sep.join(path[:i])
if self.__LevDB.getPathFromSQL(tpath) == False:
for attemp in range(1,4):
try:
self.__CilentUp.mkdir("".join([self.__DST_DIR, tpath[self.__SRC_DIR.__len__():]]))
break
except upyun.upyun.UpYunServiceException, ueer:
LOGGER.error(
"Error code: " + str(ueer.status) + " [Failed attemp %d]Create path: " % attemp + "".join(
[self.__DST_DIR, tpath[self.__SRC_DIR.__len__():]]) + " failed!")
LOGGER.info("".join(["Create path: ", self.__DST_DIR, tpath[self.__SRC_DIR.__len__():], " oked!"]))
self.__LevDB.insertPathToSQL(self.__SRC_DIR + os.sep + os.sep.join(path[:i]))
return True
def __CountPathLevel(self, path):
path = (self.__DST_DIR + path[self.__SRC_DIR.__len__():]).split(os.sep)
return len(path)
def __FileSync(self, fpath):
for attemp in range(1, 4):
try:
self.__CilentUp.put("".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]), open(fpath).read())
LOGGER.info(("".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]])) + " oked")
return True
except upyun.UpYunClientException as ce:
result = (fpath, ce.msg)
LOGGER.error("".join(
["Failed attemp[%d]" % attemp, self.__DST_DIR, fpath[self.__SRC_DIR.__len__():], " failed! <",
ce.msg,
">"]))
except upyun.UpYunServiceException as se: #--此处要建目录操作
result = (fpath, str(se.status))
LOGGER.error("".join(
["Failed attemp[%d]" % attemp, self.__DST_DIR, fpath[self.__SRC_DIR.__len__():], " failed! - ",
str(se.status), " <", se.msg, ">"]))
self.__ErrorFileList.append(result)
return False
def __getFileList(self, fpath):
start = datetime.datetime.now()
if fpath.endswith(os.sep):
pathStack = [fpath[0:-1:1]]
else:
pathStack = [fpath]
self.__DFS_FINISHED = False
self.__JobFileList = Queue.Queue(MAX_QUEUENUM) #用来存放需要上传的文件的路径
dirList = [] #初始化目录
try:
tmpList = os.listdir(fpath)
except OSError, e:
LOGGER.error(fpath.join(["Permission Denied! ", ""]))
self.__DFS_FINISHED = True
return
self.__mylock.release()
for filename in tmpList:
fullname = os.sep.join(pathStack) + os.sep + filename
if os.path.isdir(fullname):
dirList.append(filename)
elif os.path.islink(fullname):
LOGGER.warning(fullname.join(["file: ", " is a symbol link file"]))
else:
resObj = self.__LevDB.getFileFromSQL(fullname)
if resObj is not None and resObj == getLastModifyTime(fullname):
LOGGER.info(fullname.join(["file: ", " skipped"]))
continue
self.__JobFileList.put(fullname, block=True)
while dirList.__len__() > 0:
if dirList[-1] == "":
pathStack.pop()
dirList.pop()
continue
try:
tmpList = os.listdir(os.sep.join(pathStack) + os.sep + dirList[-1])
except Exception, e:
LOGGER.warning("".join(["Permission Denied! ", os.sep.join(pathStack), os.sep, dirList[-1]]))
continue
pathStack.append(dirList[-1])
dirList.pop()
dirList.append("")
currentDir = os.sep.join(pathStack)
for filename in tmpList:
fullname = os.sep.join([currentDir, filename])
if os.path.isdir(fullname):
dirList.append(filename)
elif os.path.islink(fullname):
LOGGER.warning(fullname.join(["file: ", " is a symbol link file"]))
else:
resObj = self.__LevDB.getFileFromSQL(fullname)
if resObj is not None and resObj == getLastModifyTime(fullname):
LOGGER.info(fullname.join(["file: ", " skipped!"]))
continue
self.__JobFileList.put(fullname, block=True)
self.__DFS_FINISHED = True #此处代表已经完成了对目录的遍历工作,标记
LOGGER.info("".join(["Finish the dfs after ", (datetime.datetime.now() - start).seconds.__str__(), " s\n"]))
def __Worker(self):
try:
waiting = 1
while True:
try:
fpath = self.__JobFileList.get(block=True, timeout=waiting)
except Exception, e:
if self.__DFS_FINISHED:
return
waiting = min(waiting * 2, 20)
continue
waiting = max(waiting / 2, 1)
if os.access(fpath, os.R_OK) == False:
self.__ErrorFileList.append((fpath, "Need Read Access!"))
LOGGER.error(fpath.join([" Permission Denied! ", " Need Read Access!"]))
continue
if self.__CountPathLevel(os.path.dirname(fpath)) > 7:
self.__CreatePath(os.path.dirname(fpath))
if self.__FileSync(fpath) == True: #if sync success, update the datebase.
self.__LevDB.insertFileToSQL(fpath)
finally:
self.__WORKER_NOW -= 1
def setThreadNumLimit(self, T_Limit):
self.__WORKER_LIMIT = max(1, T_Limit)
def __Ctrl_C_EXIT(self, a, b):
LOGGER.warning("SIGINT: Interrupt the process!")
sys.exit("SIGINT:INTERRUPT!")
def runMultiThreadSync(self):
start = datetime.datetime.now()
self.__WORKER_NOW = self.__WORKER_LIMIT
self.__InitLogIn()
self.__CreatePath(self.__DST_DIR)
self.__mylock.acquire()
thread.start_new_thread(self.__getFileList, (self.__SRC_DIR,))
self.__mylock.acquire()
for i in range(self.__WORKER_LIMIT):
thread.start_new_thread(self.__Worker, ())
self.__mylock.release()
ERROR_FILE = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + "_error_file.list"
self.error_file = open(ERROR_FILE, "w")
while self.__WORKER_NOW > 0:
while self.__WORKER_NOW < self.__WORKER_LIMIT and self.__JobFileList.qsize() > 0:
self.__WORKER_NOW += 1
thread.start_new_thread(self.__Worker, ())
LOGGER.debug("Create a new Thread!")
for i in range(2):
LOGGER.debug("".join([str(self.__JobFileList.qsize()), " files are found and waiting for sync."]))
time.sleep(5)
self.RollBack()
self.RollBack()
self.error_file.close()
LOGGER.info("".join(["Finish uprsync after ", str((datetime.datetime.now() - start).seconds), " s"]))
def RollBack(self):
#在写入之前,要先进行三次重传
while self.__ErrorFileList.__len__() > 0:
fpath, ecode = self.__ErrorFileList.pop()
try:
self.error_file.write(fpath + "\t\t" + ecode + "\n")
except UnicodeEncodeError, ueer:
self.error_file.write(fpath.encode("utf-8") + "\t\t" + ecode + "\n")
#---------------------------- DB ----------------------------------------
class UpYunLevelDB:
""" 元数据的操作 """
def __init__(self, SRC_DIR, DBDIR, OverWrite = False):
self.SRC_DIR = SRC_DIR
self.DBDIR = DBDIR
if OverWrite:
self.ClearDBFile()
if os.access(self.SRC_DIR, os.W_OK):
self.levDB = leveldb.LevelDB(self.DBDIR)
else:
LOGGER.error(self.SRC_DIR.join([" Permission Denied! ", " Need Write Access!"]))
LOGGER.error("LevelDB server run failed!\n")
self.levDB = None
def ClearDBFile(self):
for root,dirs,files in os.walk(self.DBDIR,topdown=False):
for name in files:
os.remove(os.path.join(root,name))
for name in dirs:
os.rmdir(os.path.join(root,name))
def reLoadLevelDB(self):
self.levDB = leveldb.LevelDB(self.DBDIR)
def getFileFromSQL(self, fapath):
if self.levDB is None: return
LOGGER.debug("".join(["getFileFromSQL: ", fapath[self.SRC_DIR.__len__():]]))
try:
resObj = self.levDB.Get(fapath[self.SRC_DIR.__len__():])
except KeyError, ker:
LOGGER.debug(fapath[self.SRC_DIR.__len__():] + " not found!")
return None
except UnicodeEncodeError, ueer:
LOGGER.debug("".join(["Chinese character coding file: ", fapath[self.SRC_DIR.__len__():]]))
try:
resObj = self.levDB.Get(fapath[self.SRC_DIR.__len__():].encode("utf-8"))
except KeyError, ker:
return None
return resObj
def insertFileToSQL(self, fpath):
if self.levDB is None: return
LOGGER.debug(
"".join(["File.Put: key: ", fpath[self.SRC_DIR.__len__():], " value: ", getLastModifyTime(fpath)]))
try:
self.levDB.Put(fpath[self.SRC_DIR.__len__():], getLastModifyTime(fpath))
except UnicodeEncodeError, ueer:
self.levDB.Put(fpath[self.SRC_DIR.__len__():].encode("utf-8"), getLastModifyTime(fpath))
def insertPathToSQL(self, fpath):
if self.levDB is None: return
LOGGER.debug("".join(["Path.Put: key: ", fpath[self.SRC_DIR.__len__():], " value: ok"]))
try:
self.levDB.Put(fpath[self.SRC_DIR.__len__():], "1")
except UnicodeEncodeError, ueer:
self.levDB.Put(fpath[self.SRC_DIR.__len__():].encode("utf-8"), "1")
def getPathFromSQL(self, fpath):
if self.levDB is None: return
LOGGER.debug("".join(["getPathFromSQL: ", fpath[self.SRC_DIR.__len__():]]))
try:
return self.levDB.Get(fpath[self.SRC_DIR.__len__():])
except KeyError, ker:
LOGGER.debug(fpath[self.SRC_DIR.__len__():] + " not found!")
return False
except UnicodeEncodeError, ueer:
LOGGER.debug("".join(["Chinese character coding file: ", fpath[self.SRC_DIR.__len__():]]))
try:
return self.levDB.Get(fpath[self.SRC_DIR.__len__():].encode("utf-8"))
except KeyError, ker:
return False
def main(args):
global JSON_FILE
if args:
JSON_FILE = args[0]
if not os.path.isfile(JSON_FILE):
return JSON_FILE + " is not a file!"
argv = open(JSON_FILE).read()
try:
argv = json.loads(argv) #读取json文件
except ValueError, ver:
return JSON_FILE + " is not a json file!"
if argv.has_key("LOG_LEVEL"):
SetLoggerLevel(argv["LOG_LEVEL"])
initLogger() #初始化日志
try:
argv["DBDIR"] = hashlib.new("md5", "?\\\\_.///".join([argv["SRC_DIR"], argv["DST_DIR"]])).hexdigest()
except UnicodeEncodeError, ueer:
argv["DBDIR"] = hashlib.new("md5", "?\\\\_.///".join(
[argv["SRC_DIR"].encode("utf-8"), argv["DST_DIR"].encode("utf-8")])).hexdigest()
ups = UpYunUprSync(**argv)
ups.runMultiThreadSync() #执行任务
return 0
if __name__ == "__main__":
status = main(sys.argv[1:])
sys.exit(status)
UpYunrSync-LevelDB
最新推荐文章于 2021-03-29 20:44:23 发布