python 作业啊啊啊

学计算机最好笑又好气的一件事情就是老师会让你写查重代码,自己查以前上交的全班作业的查重率。。。

python 号称最简单的一门计算机语言,稀里糊涂的就学完了。今天的作业查重代码就要上交了,可能老师看着交的人不多,又给下发了参考代码压缩包,okk。现在我的任务就是看懂老师发的代码。。。

骚年,努力吧,冲鸭!!!

记录一下这两份代码:

一、myTools.py

 

from pathlib import Path  #导入path类
import zipfile
import shutil,os
from unrar import rarfile
import re


def readCode(zipFolderName,filetypeList):
    codeDict = {}
    for dirpath, dirnames, filenames in os.walk(zipFolderName):
        for file in filenames:
            fullpath = os.path.join(dirpath, file)
            if fullpath.find('Lib')>0 or fullpath.find('Scripts')>0:
                continue
            strPath = re.split('\\\\|[\/]',fullpath)
            #最多遍历6层文件夹
            #print('strPath=',strPath)
            if len(strPath)>6 or len(strPath)<=2:
                continue
            #print(fullpath)
            No_Name = strPath[1]
            for ftype in filetypeList:
                if fullpath.endswith(ftype):
                    #print(fullpath)
                    with open(fullpath) as f:
                        code = ''
                        try:
                            code = f.read()
                        except UnicodeDecodeError:
                            FILE_OBJECT = open(fullpath, 'r', encoding='UTF-8')
                            code = FILE_OBJECT.read()
                        codeDict[No_Name] = codeDict.get(No_Name,'') + code
    return codeDict



def compareCode(codeDict):
    def getSet(fileCode):
        set1 = set()
        lines = re.split('\n',fileCode)
        i = 0
        #没有考虑多行注释
        for line in lines:
            newline = re.sub('\s*|{|}|#.*$', '', line)
            if len(newline) > 0:
                i = i + 1
                # print(i,newline)
                set1.add(newline)
        return set1

    for k1 in codeDict.keys():
        NoName1 = k1.encode('cp437').decode('gbk')
        fw = open(NoName1+".txt",'w')
        fw.write('同学\t\t重复行数/总行数\t重复比1\t\t\t重复比2\n')
        for k2 in codeDict.keys():
            if k1==k2:
                continue
            set1 = getSet(codeDict[k1])
            set2 = getSet(codeDict[k2])
            NoName2 = k2.encode('cp437').decode('gbk')
            line = NoName2+'\t'+str(len(set1&set2))+'/'+str(len(set1))+'\t'
            line += str((len(set1) - len(set1 - set2)) / len(set1) * 100)+'%\t'
            line += str((len(set2) - len(set2 - set1)) / len(set2) * 100)+'%\n'
            #print(set1 - set2)
            #print(set1 - set2)
            #print("fileA.java相对于fileB.java的重复比:", (len(set1) - len(set1 - set2)) / len(set1))
            #print("fileB.java相对于fileA.java的重复比:", (len(set2) - len(set2 - set1)) / len(set2))
            fw.write(line)
        fw.close()

def unrarFile(filename):
    rar = rarfile.RarFile(str(filename))
    rar.extractall(os.path.dirname(filename))

def unzipFile(filename):
    zipFolderName = ''
    with zipfile.ZipFile(filename, 'r') as f:
        i = 0
        for fn in f.namelist():
            if i==0:
                zipFolderName = fn
                i+=1
            targetPath = os.path.dirname(filename)
            extracted_path = Path(f.extract(fn,targetPath))
            #print(fn.encode('cp437').decode('gbk'))
            #shutil.move(fn, fn.encode('cp437').decode('gbk'))
            if fn.endswith('.zip'):
                #print(extracted_path)
                unzipFile(extracted_path)
            elif fn.endswith('.rar'):
                #print(extracted_path)
                unrarFile(extracted_path)
    return zipFolderName

def delete_dir(dir):
    if  os.path.isdir(dir):
        for item in os.listdir(dir):
            if item!='System Volume Information':#windows下没权限删除的目录:可在此添加更多不判断的目录
                delete_dir(os.path.join(dir, item))

        if not os.listdir(dir):
            os.rmdir(dir)
            #print("移除空目录:" + dir)

def del_file(path):
    for i in os.listdir(path):
        path_file = os.path.join(path, i)
        if os.path.isfile(path_file):
            os.remove(path_file)
        else:
            del_file(path_file)

 

二、codeDetection.py

 

import myTools

#解压文件
zipFolderName = myTools.unzipFile('字符串处理.zip')
#zipFolderName = 'homework/'

#读取源代码,存入到字典中
codeDict=myTools.readCode(zipFolderName,['.py','.txt'])

#两两查重
myTools.compareCode(codeDict)

#删除解压后的文件夹
myTools.del_file(zipFolderName)
myTools.shutil.rmtree(zipFolderName)
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值