我的项目

import pandas as pd
import csv
import numpy as np
import os
import json
from utils import convertchangpai
from utils import convertchexi
from utils import convertxiangmu
from utils import convertguobie
from utils import buchajia
from utils import sfbc
from utils import isornot_fitting_barbarism
from utils import classyfichexi
from utils import get_mean
from utils import all_list
from utils import get_mean_pengqi
from utils import get_mean_caizhaung
from utils import get_mean_weixiu
from utils import get_changpai_price
from utils import get_chexi_price
from utils import get_zhengshu_pengqi
from utils import get_zhengshu_chaizhaung
from utils import get_zhengshu_weixiu
from sklearn.preprocessing import OneHotEncoder
import xgboost as xgb
from sklearn.metrics import r2_score
from math import *

"""
生成训练文件类
传入参数:
task:任务类型,可选参数:(喷漆、拆装、维修)
sheng:机构省份名,如:四川、陕西等

方法名及功能:
get_fac_palce:获取修理厂编号对应的地级市填充到fac_place
all_list1:统计两个关联列表的频次返回一个字典
create_edition_one:生成第一版文件(给各个条目打上(规范或者不规范)标记)
create_edition_two:生成第二版文件(加上跟单,删除不规范的条目,标记车的价格类别)
get_mean1cp:获取厂牌的均值
get_meancx:获取车系的均值
create_edition_three:生成第三版文件(计算均值众数等)
create_five_feature_file:生成F值特征要用到的五个特征的数据表
create_edition_four:生成第四版文件(加入F值)
write_mean_mode:将均值众数等写入json文件
make:生成最终训练文件

例子:
trainfile = TrainFile(task='拆装',sheng='大连')#创建一个训练文件对象
trainfile.make()#调用make方法生成训练文件
trainfile.train(thread=8)#调用train方法使用8个线程开始训练
"""
class TrainFile():
    def __init__(self,task,sheng):
        if not os.path.exists('trainfile'):
            os.mkdir('trainfile')
        if not os.path.exists('trainfile\{}'.format(task)):
            os.mkdir('trainfile\{}'.format(task))
        if not os.path.exists('trainfile\{}\{}'.format(task,sheng)):
            os.mkdir('trainfile\{}\{}'.format(task,sheng))
        self.trainfile = '201811_to_202004_all.csv'
        self.outputfile = 'trainfile\{}\{}\{}训练数据.csv'.format(task,sheng,sheng + task)
        self.sheng = sheng
        self.task = task
        self.fac_place = {}
        self.fac_city = {}
        if not os.path.exists(self.outputfile):
            self.dataset = pd.read_csv(self.trainfile)
            self.get_fac_palce()
            self.dataset = self.dataset.loc[self.dataset['关联机构'] == '{}分公司'.format(sheng)]
            self.dataset['修理厂编码'] = self.dataset['修理厂编码'].fillna('空')
            self.dataset['车系'] = self.dataset['车系'].fillna('空')
            self.dataset['厂牌'] = self.dataset['厂牌'].fillna('空')
            self.dataset['定损项目名称'] = self.dataset['定损项目名称'].fillna('空')
            self.dataset['工时折扣率'] = self.dataset['工时折扣率'].fillna(0)
            self.dataset['合作类型'] = self.dataset['合作类型'].fillna('无')
            if task == '喷漆':
                self.dataset = self.dataset.loc[self.dataset['折后喷漆费'] > 0]
                self.dataset['喷漆类型'] = self.dataset['喷漆类型'].fillna('空')
            elif task == '拆装':
                self.dataset = self.dataset.loc[self.dataset['折后拆装费'] > 0]
            elif task == '维修':
                self.dataset = self.dataset.loc[self.dataset['折后维修费'] > 0]
                self.dataset['折后拆装费'] = self.dataset['折后拆装费'].fillna(0)
                self.dataset['维修程度'] = self.dataset['维修程度'].fillna('空')
                self.dataset['配件外修费'] = self.dataset['配件外修费'].fillna(0)
            else:
                print('task error!')
                exit()

    #————————————————构建特征列表——————————————
            if task == '维修':
                self.Pjwxf = []
                for data in self.dataset['配件外修费']:
                    self.Pjwxf.append(data)
                self.Wxcd = []
                for data in self.dataset['维修程度']:
                    self.Wxcd.append(data)
            self.Dsdh = []
            for data in self.dataset['定损单号']:
                self.Dsdh.append(data)
            self.Dsxmmc = []
            for data in self.dataset['定损项目名称']:
                self.Dsxmmc.append(data)
            self.Dingsunxiangmu = convertxiangmu(self.Dsxmmc)
            self.Buchalist = buchajia(self.Dsdh, self.Dsxmmc)
            self.Sfbc = sfbc(self.Dsdh, self.Buchalist)
            self.Hzlx = []
            for data in self.dataset['合作类型']:
                self.Hzlx.append(data)
            self.Xlcbm = []
            for data in self.dataset['修理厂编码']:
                self.Xlcbm.append(data)
            self.Xlcmc = []
            for data in self.dataset['修理厂名称']:
                self.Xlcmc.append(data)
            self.Czlx = []
            for data in self.dataset['操作类型']:
                self.Czlx.append(data)
            if task == '喷漆':
                self.Pqlx = []
                for data in self.dataset['喷漆类型']:
                    self.Pqlx.append(data)
            self.Gb = []
            for data in self.dataset['国别']:
                self.Gb.append(convertguobie(data))
            self.Xlclx = []
            for data in self.dataset['修理厂类型']:
                self.Xlclx.append(data)
            self.Gsdjlx = []
            for data in self.dataset['工时单价类型']:
                self.Gsdjlx.append(data)
            self.Sfcxcp = []
            for data in self.dataset['是否承修厂牌']:
                self.Sfcxcp.append(data)
            self.Zhpqf = []
            if task == '喷漆':
                for data in self.dataset['折后喷漆费']:
                    self.Zhpqf.append(float(data))
            elif task == '拆装':
                for data in self.dataset['折后拆装费']:
                    self.Zhpqf.append(float(data))
            elif task == '维修':
                self.Chai = []
                for data in self.dataset['折后拆装费']:
                    self.Chai.append(float(data))
                self.Wei = []
                for data in self.dataset['折后维修费']:
                    self.Wei.append(float(data))
                self.Zhpqf = []
                for i in range(len(self.Chai)):
                    self.Zhpqf.append(self.Wei[i] - self.Chai[i])
            self.Cp = []
            for data in self.dataset['厂牌']:
                self.Cp.append(data)
            self.Cx = []
            for data in self.dataset['车系']:
                self.Cx.append(data)
            self.Gszkl = []
            for data in self.dataset['工时折扣率']:
                self.Gszkl.append(float(data))
            self.Dsygh = []
            for data in self.dataset['定损员工号']:
                self.Dsygh.append(data)
            self.Hsygh = []
            for data in self.dataset['核损员工号']:
                self.Hsygh.append(data)
            self.Hsyxm = []
            for data in self.dataset['核损员姓名']:
                self.Hsyxm.append(data)
            self.Dsyxm = []
            for data in self.dataset['定损员名称']:
                self.Dsyxm.append(data)
            self.Zdy = []
            for data in self.dataset['配件来源']:
                self.Zdy.append(data)

            self.pc_dict = all_list(self.Dingsunxiangmu)#统计转换后的定损项目的频次
            self.Changpai = convertchangpai(self.Cp)#转换厂牌
            self.cp_dict = all_list(self.Changpai)#计算转换后的厂牌的频次
            self.Chexi = convertchexi(self.Changpai, self.Cx)#转换车系
            self.cx_dict = self.all_list1(self.Changpai, self.Chexi)#统计车系的频次

            self.A = []  # 机构、品牌、车系、工时价格类型,工时项目
            self.B = []  # 品牌、车系、工时价格类型,工时项目
            for i in range(len(self.Changpai)):
                self.A.append(sheng + self.Changpai[i] + self.Chexi[i] + self.Gsdjlx[i] + self.Dingsunxiangmu[i])
                self.B.append(self.Changpai[i] + self.Chexi[i] + self.Gsdjlx[i] + self.Dingsunxiangmu[i])
            if task == '喷漆':
                self.A1 = get_mean_pengqi(self.A, self.Zhpqf, self.Pqlx)  # 机构、品牌、车系、工时价格类型,工时项目的平均价格
                self.B1 = get_mean_pengqi(self.B, self.Zhpqf, self.Pqlx)  # 品牌、车系、工时价格类型,工时项目的平均价格
            elif task == '拆装':
                self.A1 = get_mean_caizhaung(self.A, self.Zhpqf)  # 机构、品牌、车系、工时价格类型,工时项目的平均价格
                self.B1 = get_mean_caizhaung(self.B, self.Zhpqf)  # 品牌、车系、工时价格类型,工时项目的平均价格
            elif task == '维修':
                self.A1 = get_mean_weixiu(self.A, self.Zhpqf)  # 机构、品牌、车系、工时价格类型,工时项目的平均价格
                self.B1 = get_mean_weixiu(self.B, self.Zhpqf)  # 品牌、车系、工时价格类型,工时项目的平均价格
            self.A2 = all_list(self.A)  # 机构、品牌、车系、工时价格类型,工时项目的平均数量
            self.B2 = all_list(self.B)  # 品牌、车系、工时价格类型,工时项目的平均数量

            self.Dict_duty = {}
            with open('file\查勘责任比例.txt') as f:
                datas = f.readlines()
                for data in datas:
                    data = data.strip()
                    data = data.split('|')
                    self.Dict_duty[data[0]] = data[-1]
            self.LS1 = ['杠', '杆', '轮', '叶', '翼', '灯', '盖', '门', '钢圈', 'A', 'B', 'C']

    #—————字段名称列表————————
            self.L1 = []#定损单号
            self.L2 = []#原始定损项目名称
            self.L3 = []#定损项目名称
            self.L4 = []#合作类型
            self.L5 = []#修理厂编码
            self.L6 = []#操作类型
            self.L7 = []#喷漆类型
            self.L9 = []#厂牌
            self.L10 = []#车系
            self.L11 = []#修理厂类
            self.L12 = []#工时单价类型
            self.L13 = []#是否承修厂牌
            self.L14 = []#折后喷漆费
            self.L15 = []#国别
            self.L16 = []#修理厂地址
            self.L17 = []#修理厂名称
            self.L18 = []#工时折扣率
            self.L19 = []#除以工时折扣率的喷漆费
            self.L20 = []#维修程度
            self.L21 = []#维修费
            self.L22 = []#拆装费
            self.L23 = []#定损员工号
            self.L24 = []#核损员工号
            self.L25 = []#核损员姓名
            self.L26 = []#定损员姓名
            self.L27 = []#是否含有补差价
            self.L28 = []#是否自定义
            self.L29 = []#定损项目不规范
            self.L30 = []#品牌车系录入不规范
            self.L31 = []#训练数据量不足
            self.L32 = []#定损项目金额过低
            self.L33 = []#是否单个项目补差
            self.L34 = []#责任标记
            self.L35 = []#定损项目金额过高
            self.L36 = []#外修费
            self.L37 = []  # 定损价格排序过高过低的标记,跟单标记

    #——————填充各个字段——————
            for i in range(len(self.Dingsunxiangmu)):
                if self.Gszkl[i] > 0 and self.Zhpqf[i] > 0:
                    if task == '维修':
                        if self.Pjwxf[i] > 0:
                            self.L36.append(1)
                        else:
                            self.L36.append(0)
                    if '差' in self.Dsxmmc[i]:
                        if not '差速器' in self.Dsxmmc[i]:
                            self.L35.append(1)
                        else:
                            self.L35.append(0)
                    else:
                        self.L35.append(0)
                    data = self.Dsdh[i].split('-')
                    try:
                        if self.Dict_duty[data[0]] == '同责':
                            if data[1] == '0202':
                                self.L34.append(1)
                            else:
                                self.L34.append(0)
                        elif self.Dict_duty[data[0]] == '次责':
                            self.L34.append(1)
                        else:
                            self.L34.append(0)
                    except:
                        self.L34.append(0)
                    if '自定义' in self.Zdy[i]:
                        self.L33.append(1)
                    else:
                        self.L33.append(0)
                    if self.Changpai[i] == '无' or self.Chexi[i] == '无' or '货车' in self.Changpai[i] or '摩托' in self.Changpai[i]:
                        self.L29.append(1)
                    else:
                        self.L29.append(0)
                    if self.cp_dict[self.Changpai[i]] < 10 or self.pc_dict[self.Dingsunxiangmu[i]] <= 2 or self.cx_dict[self.Changpai[i] + self.Chexi[i]] < 5:
                        self.L30.append(1)
                    else:
                        self.L30.append(0)
                    try:
                        o = self.fac_place[self.Xlcbm[i]]
                    except:
                        o = '无'
                    p = self.Xlcmc[i]
                    self.L1.append(self.Dsdh[i])
                    self.L2.append(self.Dsxmmc[i])
                    c = 0
                    for s in self.LS1:
                        if s in self.Dsxmmc[i]:
                            c += 1
                            if c >= 2:
                                break
                    if c >= 2:
                        self.L28.append(1)
                    else:
                        if isornot_fitting_barbarism(self.Dsxmmc[i]):
                            self.L28.append(1)
                        else:
                            self.L28.append(0)
                    self.L3.append(self.Dingsunxiangmu[i])
                    self.L4.append(self.Hzlx[i])
                    self.L5.append(self.Xlcbm[i])  # 不作为训练依据
                    if task == '喷漆':
                        self.L6.append(self.Czlx[i])
                        self.L7.append(self.Pqlx[i])
                    self.L15.append(self.Gb[i])
                    self.L9.append(self.Changpai[i])
                    self.L10.append(self.Chexi[i])
                    if self.Xlclx[i] == '4S店':
                        self.L11.append('4S店')
                    else:
                        self.L11.append('综合修理厂')
                    if self.Gsdjlx[i] == '4S店':
                        self.L12.append('4S店')
                    else:
                        self.L12.append('综合修理厂')
                    self.L13.append(self.Sfcxcp[i])
                    self.L14.append(self.Zhpqf[i])
                    self.L16.append(o)
                    self.L17.append(p)
                    self.L18.append(self.Gszkl[i])
                    self.L19.append(float(self.Zhpqf[i]))  # / float(Gszkl[i]) * 100
                    if task == '喷漆':
                        if float(self.Zhpqf[i]) < 10:  # / float(Gszkl[i]) * 100
                            self.L31.append(1)
                        else:
                            self.L31.append(0)
                        if float(self.Zhpqf[i]) >= 999999:  # / float(Gszkl[i]) * 100
                            self.L32.append(1)
                        else:
                            self.L32.append(0)
                    elif task == '拆装':
                        if float(self.Zhpqf[i]) < 5:  # / float(Gszkl[i]) * 100
                            self.L31.append(1)
                        else:
                            self.L31.append(0)
                        if float(self.Zhpqf[i]) >= 1000:  # / float(Gszkl[i]) * 100
                            self.L32.append(1)
                        else:
                            self.L32.append(0)
                    elif task == '维修':
                        self.L20.append(self.Wxcd[i])
                        self.L21.append(self.Wei[i])
                        self.L22.append(self.Chai[i])
                        if float(self.Zhpqf[i]) < 5:  # / float(Gszkl[i]) * 100
                            self.L31.append(1)
                        else:
                            self.L31.append(0)
                        if float(self.Zhpqf[i]) >= 10000:  # / float(Gszkl[i]) * 100
                            self.L32.append(1)
                        else:
                            self.L32.append(0)
                    self.L23.append(self.Dsygh[i])
                    self.L24.append(self.Hsygh[i])
                    self.L25.append(self.Hsyxm[i])
                    self.L26.append(self.Dsyxm[i])
                    self.L27.append(self.Sfbc[i])
                    try:
                        if self.A2[sheng + self.Changpai[i] + self.Chexi[i] + self.Gsdjlx[i] + self.Dingsunxiangmu[
                            i]] >= 5 and float(self.Zhpqf[i]) / self.A1[i] >= 3:
                            self.L37.append(1)
                        elif self.A1[i] / float(self.Zhpqf[i]) >= 3:
                            self.L37.append(1)
                        elif float(self.Zhpqf[i]) / self.B1[i] >= 2 or self.B1[i] / float(self.Zhpqf[i]) >= 3:
                            self.L37.append(1)
                        else:
                            self.L37.append(0)
                    except:
                        self.L37.append(1)


#——————获取修理厂编号对应的地级市——————
    def get_fac_palce(self):
        datas = pd.read_excel('file\全量修理厂清单.xlsx')
        code_factory = datas['修理厂代码']
        address_factory = datas['地级市']
        for i in range(len(code_factory)):
            self.fac_place[code_factory[i]] = address_factory[i]

# ——————获取一个列表的个数字典(两个参数)————————
    def all_list1(self,arr1, arr2):
        result = {}
        for i in range(len(arr1)):
            if not arr1[i] + arr2[i] in result:
                result[arr1[i] + arr2[i]] = 1
            else:
                result[arr1[i] + arr2[i]] += 1
        return result

#————————生成五个特征的数据表————————
    def create_five_feature_file(self):
        filename = self.outputfile
        fivefeature = 'trainfile\{}\{}\五个特征.csv'.format(self.task,self.sheng)
        datas = pd.read_csv(filename, encoding='gbk')

        f0 = datas['修理厂地址']
        f1 = datas['定损项目名称']
        if self.task == '喷漆':
            f2 = datas['喷漆类型']
        f3 = datas['厂牌']
        f5 = datas['车系']
        f4 = datas['修理厂类型']
        f6 = datas['价格']
        x1 = []
        x2 = []
        x3 = []
        x4 = []
        x5 = []
        x6 = []
        y = []
        for i in range(len(f1)):
            if self.task == '喷漆':
                if f2[i] == '全漆':
                    x1.append(f0[i] + ',' + f1[i] + ',' + f3[i] + ',' + f5[i] + ',' + f4[i])  # + f2[i] + ','
                    x2.append(f0[i] + ',' + f1[i] + ',' + f3[i] + ',' + f4[i])
                    x3.append(f1[i] + ',' + f3[i] + ',' + f5[i] + ',' + f4[i])
                    x4.append(f1[i] + ',' + f3[i] + ',' + f4[i])
                    x5.append(f0[i] + ',' + f1[i] + ',' + f4[i])
                    x6.append(f1[i] + ',' + f4[i])
                    y.append(f6[i])
            else:
                x1.append(f0[i] + ',' + f1[i] + ',' + f3[i] + ',' + f5[i] + ',' + f4[i])  # + f2[i] + ','
                x2.append(f0[i] + ',' + f1[i] + ',' + f3[i] + ',' + f4[i])
                x3.append(f1[i] + ',' + f3[i] + ',' + f5[i] + ',' + f4[i])
                x4.append(f1[i] + ',' + f3[i] + ',' + f4[i])
                x5.append(f0[i] + ',' + f1[i] + ',' + f4[i])
                x6.append(f1[i] + ',' + f4[i])
                y.append(f6[i])
        A1 = get_mean(x1, y)
        A2 = all_list(x1)
        B1 = get_mean(x2, y)
        B2 = all_list(x2)
        C1 = get_mean(x3, y)
        C2 = all_list(x3)
        D1 = get_mean(x4, y)
        D2 = all_list(x4)
        E1 = get_mean(x5, y)
        E2 = all_list(x5)
        F1 = get_mean(x6, y)
        F2 = all_list(x6)
        outputs = open(fivefeature, 'w', newline='')
        csv_write = csv.writer(outputs, dialect='excel')
        csv_write.writerow(
            ['地级市', '定损项目', '厂牌', '车系', '修理厂类型', 'a1', 'a2', 'b1', 'b2', 'c1', 'c2', 'd1', 'd2', 'e1', 'e2', 'f1',
             'f2'])
        for key in x1:
            data = []
            k = key.split(',')
            data.extend(k)
            k1 = k[0] + ',' + k[1] + ',' + k[2] + ',' + k[4]
            k2 = k[1] + ',' + k[2] + ',' + k[3] + ',' + k[4]
            k3 = k[1] + ',' + k[2] + ',' + k[4]
            k4 = k[0] + ',' + k[1] + ',' + k[4]
            k5 = k[1] + ',' + k[4]
            try:
                a1 = round(A1[key] / 10) * 10
            except:
                a1 = -1
            try:
                a2 = A2[key] / 10 * 10
            except:
                a2 = 0
            try:
                b1 = round(B1[k1] / 10) * 10
            except:
                b1 = -1
            try:
                b2 = B2[k1] / 10 * 10
            except:
                b2 = 0
            try:
                c1 = round(C1[k2] / 10) * 10
            except:
                c1 = -1
            try:
                c2 = C2[k2] / 10 * 10
            except:
                c2 = 0
            try:
                d1 = round(D1[k3] / 10) * 10
            except:
                d1 = -1
            try:
                d2 = D2[k3] / 10 * 10
            except:
                d2 = 0
            try:
                e1 = round(E1[k4] / 10) * 10
            except:
                e1 = -1
            try:
                e2 = E2[k4] / 10 * 10
            except:
                e2 = 0
            try:
                f1 = round(F1[k5] / 10) * 10
            except:
                f1 = -1
            try:
                f2 = F2[k5] / 10 * 10
            except:
                f2 = 0
            data.extend([a1, a2, b1, b2, c1, c2, d1, d2, e1, e2, f1, f2])
            csv_write.writerow(data)

#——————生成第一版文件————————
    def create_edition_one(self):
        outputs = open(self.outputfile, 'w', newline='')
        csv_writer = csv.writer(outputs, dialect='excel')
        if self.task == '喷漆':
            csv_writer.writerow(['定损单号', '原始定损项目名称', '定损项目名称', '合作类型', '操作类型', '喷漆类型', '国别', '厂牌', '车系', '修理厂类型', \
                                 '工时单价类型', '是否承修厂牌', '除以工时折扣率的喷漆费', '折后喷漆费', '修理厂编码', '修理厂地址', '修理厂名称', '工时折扣率', '定损员工号',
                                 '定损员姓名', '核损员工号', '核损员姓名', '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范',
                                 '训练数据量不足', '定损项目金额过低', '定损项目金额过高', '跟单标记', '定损价格排序过高过低'])
        elif self.task == '拆装':
            csv_writer.writerow(['定损单号', '原始定损项目名称', '定损项目名称', '合作类型', '国别', '厂牌', '车系', '修理厂类型', \
                                 '工时单价类型', '是否承修厂牌', '折前拆装费', '折后拆装费', '修理厂编码', '修理厂地址', '修理厂名称', '工时折扣率', '定损员工号',
                                 '定损员姓名', '核损员工号', '核损员姓名', '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范',
                                 '训练数据量不足', '定损项目金额过低', '定损项目金额过高', '跟单标记', '定损价格排序过高过低'])
        elif self.task == '维修':
            csv_writer.writerow(['定损单号', '原始定损项目名称', '定损项目名称', '合作类型', '国别', '厂牌', '车系', '修理厂类型', \
                                 '工时单价类型', '是否承修厂牌', '维修程度', '折扣前的费用', '折后维修费减去折后拆装费', '修理厂编码', \
                                 '修理厂地址', '修理厂名称', '折后维修费', '折后拆装费', '工时折扣率', '定损员工号', '定损员姓名', '核损员工号', '核损员姓名',
                                 '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范', '训练数据量不足', '定损项目金额过低',
                                 '定损项目金额过高', '跟单标记', '是否含有配件外修费','定损价格排序过高过低'])

        for i in range(len(self.L2)):
            try:
                data = []
                data.append(self.L1[i])
                data.append(self.L2[i])
                data.append(self.L3[i])
                data.append(self.L4[i])
                if self.task == '喷漆':
                    data.append(self.L6[i])
                    data.append(self.L7[i])
                data.append(self.L15[i])
                data.append(self.L9[i])
                data.append(self.L10[i])
                data.append(self.L11[i])
                data.append(self.L12[i])
                data.append(self.L13[i])
                if self.task == '维修':
                    data.append(self.L20[i])
                data.append(self.L19[i])
                data.append(self.L14[i])
                data.append(self.L5[i])
                data.append(self.L16[i])
                data.append(self.L17[i])
                if self.task == '维修':
                    data.append(self.L21[i])
                    data.append(self.L22[i])
                data.append(self.L18[i])
                data.append(self.L23[i])
                data.append(self.L26[i])
                data.append(self.L24[i])
                data.append(self.L25[i])
                data.append(self.L27[i])
                data.append(self.L35[i])
                data.append(self.L33[i])
                data.append(self.L28[i])
                data.append(self.L29[i])
                data.append(self.L30[i])
                data.append(self.L31[i])
                data.append(self.L32[i])
                data.append(self.L34[i])
                if self.task == '维修':
                    data.append(self.L36[i])
                data.append(self.L37[i])
                csv_writer.writerow(data)
            except UnicodeEncodeError:
                print(data)

# ——————生成第二版文件————————
    def create_edition_two(self):
#—————标记跟单案件———————————
        Dict_duty = {}
        filename = self.outputfile
        with open('file\查勘责任比例.txt') as f:
            datas = f.readlines()
            for data in datas:
                data = data.strip()
                data = data.split('|')
                Dict_duty[data[0]] = data[-1]
        Anhui = pd.read_csv(filename,encoding='gbk')
        dsdh = Anhui['定损单号']
        L = []
        for data in dsdh:
            data = data.split('-')
            try:
                if Dict_duty[data[0]] == '同责':
                    if data[1] == '0202':
                        L.append(1)
                    else:
                        L.append(0)
                elif Dict_duty[data[0]] == '次责':
                    L.append(1)
                else:
                    L.append(0)
            except:
                L.append(0)
        Anhui['跟单案件'] = np.c_[L]
        Anhui.to_csv(filename,index=0,encoding='gbk')
#——————根据频次计算价格————————
        i = 0
        Dict = {}
        Dict1 = {}
        filename1 = self.outputfile
        filename2 = 'temp.csv'
        if self.task == '喷漆':
            with open(filename1) as f:
                datas = f.readlines()
                for data in datas:
                    data = data.strip()
                    data = data.split(',')
                    if i > 0:
                        if len(data) == 33:
                            l1 = data[:12] + data[14:]
                            l1 = [x + '#' for x in l1]
                            l2 = ''
                            for x in l1:
                                l2 += x
                            l2 += '#' + str(data[13])
                            l = data[2] + '#' + data[5] + '#' + data[7] + '#' + data[8] + '#' + data[9]
                            if not l in Dict:
                                Dict[l] = 1
                            else:
                                Dict[l] += 1

                            if not l in Dict1:
                                Dict1[l] = [l2]
                            else:
                                Dict1[l].append(l2)
                    i += 1
            outputs = open(filename2, 'w', newline='')
            import csv
            csv_write = csv.writer(outputs, dialect='excel')
            csv_write.writerow(
                ['定损单号', '原始定损项目', '定损项目名称', '合作类型', '操作类型', '喷漆类型', '国别', '厂牌', '车系', '修理厂类型', '工时单价类型', '是否承修厂牌', '修理厂编码',
                 '修理厂地址',
                 '修理厂名称', '工时折扣率', '定损员工号', '定损员姓名', '核损员工号', '核损员姓名', '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范',
                 '训练数据量不足', '定损项目金额过低', '定损项目金额过高', '跟单标记','定损价格排序过高过低','跟单案件', '不要1', '不要2', '价格', 'n', 'i', 'rank'])
            for key in Dict1:
                L = Dict1[key]
                n = Dict[key]
                D = {}
                for l in L:
                    data = l.split('#')
                    l1 = data[:-1]
                    l1 = [x + '#' for x in l1]
                    k = ''
                    for x in l1:
                        k += x
                    v = float(data[-1])
                    D[k] = v
                D = sorted(D.items(), key=lambda x: x[1])
                i = 1
                for d in D:
                    data = []
                    data.extend(d[0].split('#'))
                    data.append(d[1])
                    data.append(n)
                    data.append(i)
                    data.append(i - n / 2)
                    csv_write.writerow(data)
                    i += 1
        elif self.task == '拆装':
            with open(filename1) as f:
                datas = f.readlines()
                for data in datas:
                    data = data.strip()
                    data = data.split(',')
                    if i > 0:
                        if len(data) == 31:
                            l1 = data[:10] + data[12:]
                            l1 = [x + '#' for x in l1]
                            l2 = ''
                            for x in l1:
                                l2 += x
                            l2 += '#' + str(data[10])
                            # l1 = data[2] + '#' + data[3] + '#' + data[4] + '#' + data[5] + '#' + data[6] + '#' + data[7] + '#' + data[8] + '#' + data[9] + '#' + str(data[12])
                            l = data[2] + '#' + data[5] + '#' + data[6] + '#' + data[7]
                            if not l in Dict:
                                Dict[l] = 1
                            else:
                                Dict[l] += 1

                            if not l in Dict1:
                                Dict1[l] = [l2]
                            else:
                                Dict1[l].append(l2)
                    i += 1
            import csv
            outputs = open(filename2, 'w', newline='')
            csv_write = csv.writer(outputs, dialect='excel')
            csv_write.writerow(
                ['定损单号', '原始定损项目', '定损项目名称', '合作类型', '国别', '厂牌', '车系', '修理厂类型', '工时单价类型', '是否承修厂牌', '修理厂编码', '修理厂地址',
                 '修理厂名称', '工时折扣率', '定损员工号', '定损员姓名', '核损员工号', '核损员姓名', '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范',
                 '训练数据量不足', '定损项目金额过低', '定损项目金额过高', '跟单标记','定损价格排序过高过低', '跟单案件', '不要1', '不要2', '价格', 'n', 'i', 'rank'])
            for key in Dict1:
                L = Dict1[key]
                n = Dict[key]
                D = {}
                for l in L:
                    data = l.split('#')
                    l1 = data[:-1]
                    l1 = [x + '#' for x in l1]
                    k = ''
                    for x in l1:
                        k += x
                    # k = data[0] + '#'+ data[1] + '#'+ data[2] + '#'+ data[3] + '#'+ data[4] + '#'+ data[5] + '#'+ data[6]+ '#'+ data[7]
                    v = float(data[-1])
                    D[k] = v
                D = sorted(D.items(), key=lambda x: x[1])
                i = 1
                for d in D:
                    data = []
                    data.extend(d[0].split('#'))
                    data.append(d[1])
                    data.append(n)
                    data.append(i)
                    data.append(i - n / 2)
                    csv_write.writerow(data)
                    i += 1
        elif self.task == '维修':
            with open(filename1) as f:
                datas = f.readlines()
                for data in datas:
                    data = data.strip()
                    data = data.split(',')
                    if i > 0:
                        if len(data) == 35:
                            l1 = data[:11] + data[13:]
                            l1 = [x + '#' for x in l1]
                            l2 = ''
                            for x in l1:
                                l2 += x
                            l2 += '#' + str(data[11])
                            l = data[2] + '#' + data[5] + '#' + data[6] + '#' + data[7]
                            if not l in Dict:
                                Dict[l] = 1
                            else:
                                Dict[l] += 1

                            if not l in Dict1:
                                Dict1[l] = [l2]
                            else:
                                Dict1[l].append(l2)
                    i += 1
            import csv
            outputs = open(filename2, 'w', newline='')
            csv_write = csv.writer(outputs, dialect='excel')
            csv_write.writerow(
                ['定损单号', '原始定损项目', '定损项目名称', '合作类型', '国别', '厂牌', '车系', '修理厂类型', '工时单价类型', '是否承修厂牌', '维修程度', '修理厂编码', '修理厂地址',
                 '修理厂名称', '折后维修费','折后拆装费','工时折扣率', '定损员工号', '定损员姓名', '核损员工号', '核损员姓名', '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范',
                 '训练数据量不足', '定损项目金额过低', '定损项目金额过高', '跟单标记', '是否含有外修费','定损价格排序过高过低', '跟单案件', '不要1', '不要2', '价格', 'n', 'i', 'rank'])
            for key in Dict1:
                L = Dict1[key]
                n = Dict[key]
                D = {}
                for l in L:
                    data = l.split('#')
                    l1 = data[:-1]
                    l1 = [x + '#' for x in l1]
                    k = ''
                    for x in l1:
                        k += x
                    v = float(data[-1])
                    D[k] = v
                D = sorted(D.items(), key=lambda x: x[1])
                i = 1
                for d in D:
                    data = []
                    data.extend(d[0].split('#'))
                    data.append(d[1])
                    data.append(n)
                    data.append(i)
                    data.append(i - n / 2)
                    csv_write.writerow(data)
                    i += 1
        datas = pd.read_csv(filename2, encoding='gbk')
        datas.drop(['不要1', '不要2'], axis=1, inplace=True)
        datas.to_csv(filename2, encoding='gbk', index=0)
#———————得到标记————————
        datas = pd.read_csv(filename2,encoding='gbk',error_bad_lines=False)
        N = datas['n']
        I = datas['i']
        L = []
        for i in range(len(I)):
            if N[i] * 0.1 >= I[i] or N[i] * 0.9 <= I[i]:
                if I[i] <= 3 or I[i] >= N[i] - 3:
                    if N[i] >= 10:
                        L.append(1)
                    else:
                        L.append(0)
                else:
                    L.append(0)
            else:
                L.append(0)
        datas['标记'] = np.c_[L]
        if self.task == '喷漆':
            datas = datas.drop(columns = ['操作类型'])
#——————筛选掉不和规则的数据————————————
            datas = datas.loc[ datas['喷漆类型'] == '全漆' ]
        datas = datas.loc[datas['是否含有补差价'] == 0]
        datas = datas.loc[datas['定损项目不规范'] == 0]
        datas = datas.loc[datas['品牌车系录入不规范'] == 0]
        datas = datas.loc[datas['训练数据量不足'] == 0]
        datas = datas.loc[datas['定损项目金额过低'] == 0]
        datas = datas.loc[datas['定损项目金额过高'] == 0]
        datas = datas.loc[datas['定损价格排序过高过低'] == 0]
        datas = datas.loc[datas['标记'] == 0]
        if self.task == '维修':
            datas = datas.loc[datas['是否含有外修费'] == 0]
#——————获得车系类别——————————————
        class_chexi = classyfichexi(datas)
        datas['车系类别'] = np.c_[class_chexi]
        datas.to_csv(filename1,index=0,encoding='gbk')

# ——————生成第三版文件(加入均值众数)————————
    def get_mean1cp(self,X):
        L = []
        for x in X:
            try:
                L.append(self.Dict1[x])
            except:
                L.append(-1)
        return L

    def get_meancx(self,X, Y):
        L = []
        for i in range(len(X)):
            try:
                L.append(self.Dict2[X[i] + Y[i]])
            except:
                try:
                    L.append(self.Dict1[X[i]])
                except:
                    L.append(-1)
        return L

    def create_edition_three(self):
        filename = self.outputfile
        datas = pd.read_csv(filename, encoding='gbk')
        self.Dict1 = get_changpai_price(self.sheng)
        self.Dict2 = get_chexi_price(self.sheng)

        Dingsunxm = list(datas['定损项目名称'])
        if self.task == '喷漆':
            Penqilx = list(datas['喷漆类型'])
        Price = list(datas['价格'])
        Hezuoleix = list(datas['合作类型'])
        Changpai = list(datas['厂牌'])
        Chexi = list(datas['车系'])
        Xiulichanglx = list(datas['修理厂类型'])
        mean_changpai = self.get_mean1cp(Changpai)
        mean_chexi = self.get_meancx(Changpai, Chexi)
        if self.task == '喷漆':
            mean_hzlx = get_mean_pengqi(Hezuoleix, Price, Penqilx)
            mean_peijian = get_mean_pengqi(Dingsunxm, Price, Penqilx)
            mean_xiulichang = get_mean_pengqi(Xiulichanglx, Price, Penqilx)
            zhong_hzlx = get_zhengshu_pengqi(Hezuoleix, Price, Penqilx)
            zhong_peijian = get_zhengshu_pengqi(Dingsunxm, Price, Penqilx)
            zhong_xiulichang = get_zhengshu_pengqi(Xiulichanglx, Price, Penqilx)
        elif self.task == '拆装':
            mean_hzlx = get_mean_caizhaung(Hezuoleix, Price)
            mean_peijian = get_mean_caizhaung(Dingsunxm, Price)
            mean_xiulichang = get_mean_caizhaung(Xiulichanglx, Price)
            zhong_hzlx = get_zhengshu_chaizhaung(Hezuoleix, Price)
            zhong_peijian = get_zhengshu_chaizhaung(Dingsunxm, Price)
            zhong_xiulichang = get_zhengshu_chaizhaung(Xiulichanglx, Price)
        elif self.task == '维修':
            mean_hzlx = get_mean_weixiu(Hezuoleix, Price)
            mean_peijian = get_mean_weixiu(Dingsunxm, Price)
            mean_xiulichang = get_mean_weixiu(Xiulichanglx, Price)
            zhong_hzlx = get_zhengshu_weixiu(Hezuoleix, Price)
            zhong_peijian = get_zhengshu_weixiu(Dingsunxm, Price)
            zhong_xiulichang = get_zhengshu_weixiu(Xiulichanglx, Price)
        datas['厂牌均值'] = np.c_[mean_changpai]
        datas['车系均值'] = np.c_[mean_chexi]
        datas['合作类型均值'] = np.c_[mean_hzlx]
        datas['项目均值'] = np.c_[mean_peijian]
        datas['修理厂均值'] = np.c_[mean_xiulichang]
        datas['合作类型众数'] = np.c_[zhong_hzlx]
        datas['项目众数'] = np.c_[zhong_peijian]
        datas['修理厂众数'] = np.c_[zhong_xiulichang]
        datas = datas.loc[datas['厂牌均值'] != -1]
        datas = datas.loc[datas['车系均值'] != -1]
        datas = datas.loc[datas['合作类型均值'] != -1]
        datas = datas.loc[datas['项目均值'] != -1]
        datas = datas.loc[datas['修理厂均值'] != -1]
        datas = datas.loc[datas['合作类型众数'] != -1]
        datas = datas.loc[datas['项目众数'] != -1]
        datas = datas.loc[datas['修理厂众数'] != -1]
        datas.to_csv(filename, encoding='gbk', index=0)

# ——————生成第四版文件(加入F值)————————
    def create_edition_four(self):
        s = self.sheng
        filename = self.outputfile
        fivefeature = 'trainfile\{}\{}\五个特征.csv'.format(self.task, self.sheng)
        datas = pd.read_csv(filename, encoding='gbk')

        f1 = datas['定损项目名称']
        f2 = datas['厂牌']
        f3 = datas['车系']
        f4 = datas['修理厂类型']
        f0 = datas['修理厂地址']

        A1 = {}
        A2 = {}
        B1 = {}
        B2 = {}
        C1 = {}
        C2 = {}
        D1 = {}
        D2 = {}
        E1 = {}
        E2 = {}
        F_1 = {}
        F_2 = {}
        with open(fivefeature) as f:
            datas1 = f.readlines()
            for data in datas1:
                data = data.strip()
                data = data.split(',')
                x1 = data[0] + data[1] + data[2] + data[3] + data[4]
                x2 = data[0] + data[1] + data[2] + data[4]
                x3 = data[1] + data[2] + data[3] + data[4]
                x4 = data[1] + data[2] + data[4]
                x5 = data[0] + data[1] + data[4]
                x6 = data[1] + data[4]
                try:
                    A1[x1] = float(data[5])
                    A2[x1] = float(data[6])
                    B1[x2] = float(data[7])
                    B2[x2] = float(data[8])
                    C1[x3] = float(data[9])
                    C2[x3] = float(data[10])
                    D1[x4] = float(data[11])
                    D2[x4] = float(data[12])
                    E1[x5] = float(data[13])
                    E2[x5] = float(data[14])
                    F_1[x6] = float(data[15])
                    F_2[x6] = float(data[16])
                except ValueError:
                    pass
        F0 = []
        F1 = []
        F2 = []
        F3 = []
        F4 = []
        F5 = []
        F6 = []
        for i in range(len(f1)):
            X1 = f0[i] + f1[i] + f2[i] + f3[i] + f4[i]
            X2 = f0[i] + f1[i] + f2[i] + f4[i]
            X3 = f1[i] + f2[i] + f3[i] + f4[i]
            X4 = f1[i] + f2[i] + f4[i]
            X5 = f0[i] + f1[i] + f4[i]
            X6 = f1[i] + f4[i]
            if A2.get(X1) is not None and A2[X1] >= 8:
                f0_ = A1[X1]
                f1_ = 1
                f2_ = 0
                f3_ = 0
                f4_ = 0
                f5_ = 0
                f6_ = 0
            elif B2.get(X2) is not None and B2[X2] >= 8:
                f0_ = B1[X2]
                f1_ = 0
                f2_ = 1
                f3_ = 0
                f4_ = 0
                f5_ = 0
                f6_ = 0
            elif C2.get(X3) is not None and C2[X3] >= 8:
                f0_ = C1[X3]
                f1_ = 0
                f2_ = 0
                f3_ = 1
                f4_ = 0
                f5_ = 0
                f6_ = 0
            elif D2.get(X4) is not None and D2[X4] >= 8:
                f0_ = D1[X4]
                f1_ = 0
                f2_ = 0
                f3_ = 0
                f4_ = 1
                f5_ = 0
                f6_ = 0
            elif E2.get(X5) is not None and E2[X5] >= 8:
                f0_ = E1[X5]
                f1_ = 0
                f2_ = 0
                f3_ = 0
                f4_ = 0
                f5_ = 1
                f6_ = 0
            elif F_2.get(X6) is not None and F_2[X6] >= 8:
                f0_ = F_1[X6]
                f1_ = 0
                f2_ = 0
                f3_ = 0
                f4_ = 0
                f5_ = 0
                f6_ = 1
            else:
                f0_ = -1
                f1_ = 0
                f2_ = 0
                f3_ = 0
                f4_ = 0
                f5_ = 0
                f6_ = 0

            F0.append(f0_)
            F1.append(f1_)
            F2.append(f2_)
            F3.append(f3_)
            F4.append(f4_)
            F5.append(f5_)
            F6.append(f6_)
        datas['F0'] = np.c_[F0]
        datas['F1'] = np.c_[F1]
        datas['F2'] = np.c_[F2]
        datas['F3'] = np.c_[F3]
        datas['F4'] = np.c_[F4]
        datas['F5'] = np.c_[F5]
        datas['F6'] = np.c_[F6]
        datas = datas.drop(columns=['训练数据量不足'])

        datas = datas.loc[datas['F0'] != -1]
        datas = datas.loc[datas['修理厂地址'] != '无']
        datas = datas.loc[datas['F0'] != -1]
        datas = datas.loc[datas['F1'] != -1]
        datas = datas.loc[datas['F2'] != -1]
        datas = datas.loc[datas['F3'] != -1]
        datas = datas.loc[datas['F4'] != -1]
        datas = datas.loc[datas['F5'] != -1]
        datas = datas.loc[datas['F6'] != -1]
        datas['机构名'] = np.c_[[self.sheng for i in range(len(datas))]]
        if self.task == '喷漆':
            order = ['定损单号', '原始定损项目', '定损项目名称', '合作类型', '喷漆类型', '国别', '厂牌', '车系类别', '修理厂类型', '工时单价类型', \
                     '是否承修厂牌', '厂牌均值', '车系均值', '合作类型均值', '项目均值', '修理厂均值', '合作类型众数', '项目众数', \
                     '修理厂众数', 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', '价格', '修理厂编码','机构名', '修理厂地址', '修理厂名称', '工时折扣率', '定损员工号',
                     '定损员姓名','核损员工号', '核损员姓名','车系']
        elif self.task == '拆装':
            order = ['定损单号', '原始定损项目', '定损项目名称', '合作类型', '国别', '厂牌', '车系类别', '修理厂类型', '工时单价类型', \
                     '是否承修厂牌', '厂牌均值', '车系均值', '合作类型均值', '项目均值', '修理厂均值', '合作类型众数', '项目众数', \
                     '修理厂众数', 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', '价格', '修理厂编码', '机构名', '修理厂地址', '修理厂名称', '工时折扣率',
                     '定损员工号','定损员姓名', '核损员工号', '核损员姓名', '车系']
        elif self.task == '维修':
            order = ['定损单号', '原始定损项目', '定损项目名称', '合作类型','维修程度', '国别', '厂牌', '车系类别', '修理厂类型', '工时单价类型', \
                     '是否承修厂牌', '厂牌均值', '车系均值', '合作类型均值', '项目均值', '修理厂均值', '合作类型众数', '项目众数', \
                     '修理厂众数', 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', '价格', '修理厂编码', '机构名', '修理厂地址', '修理厂名称', '工时折扣率',
                     '定损员工号','定损员姓名', '核损员工号', '核损员姓名', '车系']
        datas = datas[order]
        datas.to_csv(filename, index=0, encoding='gbk')

    def write_mean_mode(self):
        if not os.path.exists('json'):
            os.mkdir('json')
        if not os.path.exists('json\{}'.format(self.task)):
            os.mkdir('json\{}'.format(self.task))
        if not os.path.exists('json\{}\{}'.format(self.task,self.sheng)):
            os.mkdir('json\{}\{}'.format(self.task,self.sheng))

        datas = pd.read_csv(self.outputfile, encoding='gbk')
        xm = datas['定损项目名称']
        xlc = datas['修理厂类型']
        hzlx = datas['合作类型']
        m_xm = datas['项目均值']
        z_xm = datas['项目众数']
        m_xlc = datas['修理厂均值']
        z_xlc = datas['修理厂众数']
        m_hzlx = datas['合作类型均值']
        z_hzlx = datas['合作类型众数']

        M_xm = {}
        Z_xm = {}
        M_xlc = {}
        Z_xlc = {}
        M_hzlx = {}
        Z_hzlx = {}

        for i in range(len(datas)):
            if not xm[i] in M_xm:
                M_xm[xm[i]] = m_xm[i]
                Z_xm[xm[i]] = z_xm[i]
            if not xlc[i] in M_xlc:
                M_xlc[xlc[i]] = m_xlc[i]
                Z_xlc[xlc[i]] = z_xlc[i]
            if not hzlx[i] in M_hzlx:
                M_hzlx[hzlx[i]] = m_hzlx[i]
                Z_hzlx[hzlx[i]] = z_hzlx[i]

        onehot_path = 'json/{}/{}'.format(self.task,self.sheng)
        with open('{}/项目均值.json'.format(onehot_path), 'w') as f:
            json.dump(M_xm, f)
        with open('{}/项目众数.json'.format(onehot_path), 'w') as f:
            json.dump(Z_xm, f)
        with open('{}/修理厂均值.json'.format(onehot_path), 'w') as f:
            json.dump(M_xlc, f)
        with open('{}/修理厂众数.json'.format(onehot_path), 'w') as f:
            json.dump(Z_xlc, f)
        with open('{}/合作类型均值.json'.format(onehot_path), 'w') as f:
            json.dump(M_hzlx, f)
        with open('{}/合作类型众数.json'.format(onehot_path), 'w') as f:
            json.dump(Z_hzlx, f)

    def make(self):
        self.create_edition_one()
        self.create_edition_two()
        self.create_edition_three()
        self.create_five_feature_file()
        self.create_edition_four()
        self.write_mean_mode()

    def train(self,thread):
        enc = OneHotEncoder()
        np.set_printoptions(threshold=5000)
        onehot_path = 'json/{}/{}'.format(self.task,self.sheng)
        train_file = self.outputfile
        train_output = 'trainfile/{}/{}/{}训练结果.xlsx'.format(self.task,self.sheng,self.sheng + self.task)
        thread = thread
        model_name = 'trainfile/{}/{}/{}.model'.format(self.task,self.sheng,self.sheng + self.task)

        x1 = []
        x2 = []
        x3 = []
        x4 = []
        x5 = []
        x6 = []
        x7 = []
        x8 = []
        x9 = []
        x10 = []
        x11 = []
        if self.task == '喷漆':
            with open(train_file, encoding='gbk') as f:
                datas = csv.reader(f)
                i = 0
                for data in datas:
                    if i > 0:
                        x1.append(data[2])
                        x2.append(data[3])
                        x3.append(data[4])
                        x4.append(data[5])
                        x5.append(data[6])
                        x6.append(data[7])
                        x7.append(data[8])
                        x9.append(data[10])
                    i += 1

            X1 = [[data] for data in list(set(x1))]
            X2 = [[data] for data in list(set(x2))]
            X3 = [[data] for data in list(set(x3))]
            X4 = [[data] for data in list(set(x4))]
            X5 = [[data] for data in list(set(x5))]
            X6 = [[data] for data in list(set(x6))]
            X7 = [[data] for data in list(set(x7))]
            X9 = [[data] for data in list(set(x9))]

            enc.fit(X1)
            D = {}
            values = enc.transform(X1).toarray()
            for i in range(len(X1)):
                D[X1[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/定损项目名称.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X2)
            D = {}
            values = enc.transform(X2).toarray()
            for i in range(len(X2)):
                D[X2[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/合作类型.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X3)
            D = {}
            values = enc.transform(X3).toarray()
            for i in range(len(X3)):
                D[X3[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/喷漆类型.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X4)
            D = {}
            values = enc.transform(X4).toarray()
            for i in range(len(X4)):
                D[X4[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/国别.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X5)
            D = {}
            values = enc.transform(X5).toarray()
            for i in range(len(X5)):
                D[X5[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/厂牌.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X6)
            D = {}
            values = enc.transform(X6).toarray()
            for i in range(len(X6)):
                D[X6[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/车系.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X7)
            D = {}
            values = enc.transform(X7).toarray()
            for i in range(len(X7)):
                D[X7[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/修理厂类型.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X9)
            D = {}
            values = enc.transform(X9).toarray()
            for i in range(len(X9)):
                D[X9[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/是否承修厂牌.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            dataset = pd.read_csv(train_file, encoding='gbk')  # 注意自己数据路径
            train = dataset.iloc[:, 2:11].values
            train2 = dataset.iloc[:, 11:26].values
            labels = dataset.iloc[:, 26].values

            with open('{}/定损项目名称.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary0 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 0]])

            with open('{}/合作类型.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary1 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 1]])

            with open('{}/喷漆类型.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary3 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 2]])

            with open('{}/国别.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary4 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 3]])

            with open('{}/厂牌.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary5 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 4]])

            with open('{}/车系.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary6 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 5]])

            with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary7 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 6]])

            with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary8 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 7]])

            with open('{}/是否承修厂牌.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary9 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 8]])

            num = []
            num.append(len(intermediary0[0]))
            num.append(len(intermediary1[0]))
            num.append(len(intermediary3[0]))
            num.append(len(intermediary4[0]))
            num.append(len(intermediary5[0]))
            num.append(len(intermediary6[0]))
            num.append(len(intermediary7[0]))
            num.append(len(intermediary8[0]))
            num.append(len(intermediary9[0]))

            trains = np.zeros(shape=(len(intermediary0), sum(num)))
            for i in range(len(intermediary0)):
                trains[i, :num[0]] = intermediary0[i]
                trains[i, num[0]:sum(num[:2])] = intermediary1[i]
                trains[i, sum(num[:2]):sum(num[:3])] = intermediary3[i]
                trains[i, sum(num[:3]):sum(num[:4])] = intermediary4[i]
                trains[i, sum(num[:4]):sum(num[:5])] = intermediary5[i]
                trains[i, sum(num[:5]):sum(num[:6])] = intermediary6[i]
                trains[i, sum(num[:6]):sum(num[:7])] = intermediary7[i]
                trains[i, sum(num[:7]):sum(num[:8])] = intermediary8[i]
                trains[i, sum(num[:8]):sum(num)] = intermediary9[i]
        elif self.task == '拆装':
            with open(train_file, encoding='gbk') as f:
                datas = csv.reader(f)
                i = 0
                for data in datas:
                    if i > 0:
                        x1.append(data[2])
                        x2.append(data[3])
                        x5.append(data[4])
                        x6.append(data[5])
                        x7.append(data[6])
                        x8.append(data[7])
                        x9.append(data[8])
                        x10.append(data[9])
                    i += 1

            X1 = [[data] for data in list(set(x1))]
            X2 = [[data] for data in list(set(x2))]
            X5 = [[data] for data in list(set(x5))]
            X6 = [[data] for data in list(set(x6))]
            X7 = [[data] for data in list(set(x7))]
            X8 = [[data] for data in list(set(x8))]
            X9 = [[data] for data in list(set(x9))]
            X10 = [[data] for data in list(set(x10))]

            enc.fit(X1)
            D = {}
            values = enc.transform(X1).toarray()
            for i in range(len(X1)):
                D[X1[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/定损项目名称.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X2)
            D = {}
            values = enc.transform(X2).toarray()
            for i in range(len(X2)):
                D[X2[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/合作类型.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X5)
            D = {}
            values = enc.transform(X5).toarray()
            for i in range(len(X5)):
                D[X5[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/国别.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X6)
            D = {}
            values = enc.transform(X6).toarray()
            for i in range(len(X6)):
                D[X6[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/厂牌.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X7)
            D = {}
            values = enc.transform(X7).toarray()
            for i in range(len(X7)):
                D[X7[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/车系.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X8)
            D = {}
            values = enc.transform(X8).toarray()
            for i in range(len(X8)):
                D[X8[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/修理厂类型.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X10)
            D = {}
            values = enc.transform(X10).toarray()
            for i in range(len(X10)):
                D[X10[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/是否承修厂牌.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            dataset = pd.read_csv(train_file, encoding='gbk')  # 注意自己数据路径
            train = dataset.iloc[:, 2:10].values
            train2 = dataset.iloc[:, 10:25].values
            labels = dataset.iloc[:, 25].values
            with open('{}/定损项目名称.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary0 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 0]])

            with open('{}/合作类型.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary1 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 1]])

            with open('{}/国别.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary4 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 2]])

            with open('{}/厂牌.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary5 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 3]])

            with open('{}/车系.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary6 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 4]])

            with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary7 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 5]])

            with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary8 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 6]])

            with open('{}/是否承修厂牌.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary9 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 7]])

            num = []
            num.append(len(intermediary0[0]))
            num.append(len(intermediary1[0]))
            num.append(len(intermediary4[0]))
            num.append(len(intermediary5[0]))
            num.append(len(intermediary6[0]))
            num.append(len(intermediary7[0]))
            num.append(len(intermediary8[0]))
            num.append(len(intermediary9[0]))

            trains = np.zeros(shape=(len(intermediary0), sum(num)))
            for i in range(len(intermediary0)):
                trains[i, :num[0]] = intermediary0[i]
                trains[i, num[0]:sum(num[:2])] = intermediary1[i]
                trains[i, sum(num[:2]):sum(num[:3])] = intermediary4[i]
                trains[i, sum(num[:3]):sum(num[:4])] = intermediary5[i]
                trains[i, sum(num[:4]):sum(num[:5])] = intermediary6[i]
                trains[i, sum(num[:5]):sum(num[:6])] = intermediary7[i]
                trains[i, sum(num[:6]):sum(num[:7])] = intermediary8[i]
                trains[i, sum(num[:7]):sum(num)] = intermediary9[i]
        elif self.task == '维修':
            with open(train_file, encoding='gbk') as f:
                datas = csv.reader(f)
                i = 0
                for data in datas:
                    if i > 0:
                        x1.append(data[2])
                        x2.append(data[3])
                        x5.append(data[4])
                        x6.append(data[5])
                        x7.append(data[6])
                        x8.append(data[7])
                        x9.append(data[8])
                        x10.append(data[9])
                        x11.append(data[10])
                    i += 1

            X1 = [[data] for data in list(set(x1))]
            X2 = [[data] for data in list(set(x2))]
            X5 = [[data] for data in list(set(x5))]
            X6 = [[data] for data in list(set(x6))]
            X7 = [[data] for data in list(set(x7))]
            X8 = [[data] for data in list(set(x8))]
            X9 = [[data] for data in list(set(x9))]
            X10 = [[data] for data in list(set(x10))]
            X11 = [[data] for data in list(set(x11))]

            enc.fit(X1)
            D = {}
            values = enc.transform(X1).toarray()
            for i in range(len(X1)):
                D[X1[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/定损项目名称.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X2)
            D = {}
            values = enc.transform(X2).toarray()
            for i in range(len(X2)):
                D[X2[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/合作类型.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X5)
            D = {}
            values = enc.transform(X5).toarray()
            for i in range(len(X5)):
                D[X5[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/维修程度.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X6)
            D = {}
            values = enc.transform(X6).toarray()
            for i in range(len(X6)):
                D[X6[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/国别.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X7)
            D = {}
            values = enc.transform(X7).toarray()
            for i in range(len(X7)):
                D[X7[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/厂牌.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X8)
            D = {}
            values = enc.transform(X8).toarray()
            for i in range(len(X8)):
                D[X8[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/车系.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X9)
            D = {}
            values = enc.transform(X9).toarray()
            for i in range(len(X9)):
                D[X9[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/修理厂类型.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            enc.fit(X11)
            D = {}
            values = enc.transform(X11).toarray()
            for i in range(len(X11)):
                D[X11[i][0]] = str(values[i]).replace('\n', '')
            with open('{}/是否承修厂牌.json'.format(onehot_path), 'w') as f:
                json.dump(D, f)

            dataset = pd.read_csv(train_file, encoding='gbk')  # 注意自己数据路径
            train = dataset.iloc[:, 2:11].values
            train2 = dataset.iloc[:, 11:26].values
            labels = dataset.iloc[:, 26].values

            with open('{}/定损项目名称.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary0 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 0]])

            with open('{}/合作类型.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary1 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 1]])

            with open('{}/维修程度.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary4 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 2]])

            with open('{}/国别.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary5 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 3]])

            with open('{}/厂牌.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary6 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 4]])

            with open('{}/车系.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary7 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 5]])

            with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary8 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 6]])

            with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary9 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                      data in train[:, 7]])

            with open('{}/是否承修厂牌.json'.format(onehot_path)) as f:
                D = json.load(f)
            intermediary10 = np.array([np.array(
                [int(n) for n in list(D[data].replace('.', '').replace(' ', '').replace('[', '').replace(']', ''))]) for
                                       data in train[:, 8]])

            num = []
            num.append(len(intermediary0[0]))
            num.append(len(intermediary1[0]))
            num.append(len(intermediary4[0]))
            num.append(len(intermediary5[0]))
            num.append(len(intermediary6[0]))
            num.append(len(intermediary7[0]))
            num.append(len(intermediary8[0]))
            num.append(len(intermediary9[0]))
            num.append(len(intermediary10[0]))

            trains = np.zeros(shape=(len(intermediary0), sum(num)))
            for i in range(len(intermediary0)):
                trains[i, :num[0]] = intermediary0[i]
                trains[i, num[0]:sum(num[:2])] = intermediary1[i]
                trains[i, sum(num[:2]):sum(num[:3])] = intermediary4[i]
                trains[i, sum(num[:3]):sum(num[:4])] = intermediary5[i]
                trains[i, sum(num[:4]):sum(num[:5])] = intermediary6[i]
                trains[i, sum(num[:5]):sum(num[:6])] = intermediary7[i]
                trains[i, sum(num[:6]):sum(num[:7])] = intermediary8[i]
                trains[i, sum(num[:7]):sum(num[:8])] = intermediary9[i]
                trains[i, sum(num[:8]):sum(num)] = intermediary10[i]
        trainss = np.concatenate((trains, train2), axis=1)
        print('开始')
        xgtrain = xgb.DMatrix(trainss, label=labels)
        params = {
            'booster': 'gbtree',
            # 这里手写数字是0-9,是一个多类的问题,因此采用了multisoft多分类器,
            'objective': 'reg:squarederror',
            'gamma': 0.05,  # 在树的叶子节点下一个分区的最小损失,越大算法模型越保守 。[0:]
            'max_depth': 7,  # 构建树的深度 [1:]
            # 'lambda':450,  # L2 正则项权重
            'subsample': 0.5,  # 采样训练数据,设置为0.5,随机选择一般的数据实例 (0:1]
            'colsample_bytree': 1,  # 对特征的采样比例用来控制每棵随机采样的列数的占比(每一列是一个特征)
            # 'min_child_weight':12, # 节点的最少特征数
            'silent': 0,
            'eta': 0.008,  # 如同学习率
            'seed': 710,
            'nthread': thread,  # cpu 线程数,根据自己U的个数适当调整
        }
        plst = list(params.items())
        num_rounds = 800  # 迭代你次数
        model = xgb.train(plst, xgtrain, num_rounds)
        preds = model.predict(xgtrain, ntree_limit=model.best_iteration)
        Acc = []
        for i in range(len(preds)):
            acc = exp(-abs(labels[i] - preds[i]) / labels[i] * 2)
            Acc.append(acc)
        Acc = np.array(Acc)
        print('平均准确率:',r2_score(labels, preds))  # [offset:]
        dataset['预测费用'] = np.c_[preds]
        dataset['准确率'] = np.c_[Acc]
        dataset.to_excel(train_output)
        model.save_model(model_name)
import pandas as pd
import csv
import numpy as np
import os
import json
from utils import convertchangpai
from utils import convertchexi
from utils import convertxiangmu
from utils import convertguobie
from utils import buchajia
from utils import sfbc
from utils import isornot_fitting_barbarism
from utils import classyfichexi
from utils import all_list
from utils import get_mean_pengqi
from utils import get_mean_caizhaung
from utils import get_mean_weixiu
from utils import get_changpai_price
from utils import get_chexi_price
import xgboost as xgb
from math import *

"""
生成测试文件类
传入参数:
task:任务类型,可选参数:(喷漆、拆装、维修)
sheng:机构省份名,如:四川、陕西等
starttime:起始时间如(2019-04、2018-12、2020-02)等
endtime:截至时间
方法名及功能:
get_fac_palce:获取修理厂编号对应的地级市填充到fac_place
all_list1:统计两个关联列表的频次返回一个字典
create_edition_one:生成第一版文件(给各个条目打上(规范或者不规范)标记)
create_edition_two:生成第二版文件(加上跟单,删除不规范的条目,标记车的价格类别)
get_mean1cp:获取厂牌的均值
get_meancx:获取车系的均值
create_edition_three:生成第三版文件(加载均值众数等)
create_edition_four:生成第四版文件(加入F值)
make:生成最终测试文件

例子:
testfile = TestFile(task='喷漆',sheng='四川',starttime='2020-03',endtime='2020-04')#创建一个测试文件对象
testfile.make()#生成测试文件
testfile.eval()#进行测试
"""
class TestFile():
    def __init__(self,task,sheng,starttime,endtime):
        if not os.path.exists('testfile'):
            os.mkdir('testfile')
        if not os.path.exists('testfile\{}'.format(task)):
            os.mkdir('testfile\{}'.format(task))
        if not os.path.exists('testfile\{}\{}'.format(task,sheng)):
            os.mkdir('testfile\{}\{}'.format(task,sheng))
        self.trainfile = '201811_to_202004_all.csv'
        self.outputfile = 'testfile\{}\{}\{}预测数据.csv'.format(task,sheng,sheng + task)
        self.sheng = sheng
        self.task = task
        self.fac_place = {}
        self.fac_city = {}
        if not os.path.exists(self.outputfile):
            self.dataset = pd.read_csv(self.trainfile)
            self.get_fac_palce()
            self.dataset = self.dataset.loc[(self.dataset['关联机构'] == '{}分公司'.format(sheng)) & (self.dataset['末核损通过时间'] >= starttime) & (self.dataset['末核损通过时间'] < endtime)]
            self.dataset['修理厂编码'] = self.dataset['修理厂编码'].fillna('空')
            self.dataset['车系'] = self.dataset['车系'].fillna('空')
            self.dataset['厂牌'] = self.dataset['厂牌'].fillna('空')
            self.dataset['定损项目名称'] = self.dataset['定损项目名称'].fillna('空')
            self.dataset['工时折扣率'] = self.dataset['工时折扣率'].fillna(0)
            self.dataset['合作类型'] = self.dataset['合作类型'].fillna('无')
            if task == '喷漆':
                self.dataset = self.dataset.loc[self.dataset['折后喷漆费'] > 0]
                self.dataset['喷漆类型'] = self.dataset['喷漆类型'].fillna('空')
            elif task == '拆装':
                self.dataset = self.dataset.loc[self.dataset['折后拆装费'] > 0]
            elif task == '维修':
                self.dataset = self.dataset.loc[self.dataset['折后维修费'] > 0]
                self.dataset['折后拆装费'] = self.dataset['折后拆装费'].fillna(0)
                self.dataset['维修程度'] = self.dataset['维修程度'].fillna('空')
                self.dataset['配件外修费'] = self.dataset['配件外修费'].fillna(0)
            else:
                print('task error!')
                exit()

    #————————————————构建特征列表——————————————
            if task == '维修':
                self.Pjwxf = []
                for data in self.dataset['配件外修费']:
                    self.Pjwxf.append(data)
                self.Wxcd = []
                for data in self.dataset['维修程度']:
                    self.Wxcd.append(data)
            self.Dsdh = []
            for data in self.dataset['定损单号']:
                self.Dsdh.append(data)
            self.Dsxmmc = []
            for data in self.dataset['定损项目名称']:
                self.Dsxmmc.append(data)
            self.Dingsunxiangmu = convertxiangmu(self.Dsxmmc)
            self.Buchalist = buchajia(self.Dsdh, self.Dsxmmc)
            self.Sfbc = sfbc(self.Dsdh, self.Buchalist)
            self.Hzlx = []
            for data in self.dataset['合作类型']:
                self.Hzlx.append(data)
            self.Xlcbm = []
            for data in self.dataset['修理厂编码']:
                self.Xlcbm.append(data)
            self.Xlcmc = []
            for data in self.dataset['修理厂名称']:
                self.Xlcmc.append(data)
            self.Czlx = []
            for data in self.dataset['操作类型']:
                self.Czlx.append(data)
            if task == '喷漆':
                self.Pqlx = []
                for data in self.dataset['喷漆类型']:
                    self.Pqlx.append(data)
            self.Gb = []
            for data in self.dataset['国别']:
                self.Gb.append(convertguobie(data))
            self.Xlclx = []
            for data in self.dataset['修理厂类型']:
                self.Xlclx.append(data)
            self.Gsdjlx = []
            for data in self.dataset['工时单价类型']:
                self.Gsdjlx.append(data)
            self.Sfcxcp = []
            for data in self.dataset['是否承修厂牌']:
                self.Sfcxcp.append(data)
            self.Zhpqf = []
            if task == '喷漆':
                for data in self.dataset['折后喷漆费']:
                    self.Zhpqf.append(float(data))
            elif task == '拆装':
                for data in self.dataset['折后拆装费']:
                    self.Zhpqf.append(float(data))
            elif task == '维修':
                self.Chai = []
                for data in self.dataset['折后拆装费']:
                    self.Chai.append(float(data))
                self.Wei = []
                for data in self.dataset['折后维修费']:
                    self.Wei.append(float(data))
                self.Zhpqf = []
                for i in range(len(self.Chai)):
                    self.Zhpqf.append(self.Wei[i] - self.Chai[i])
            self.Cp = []
            for data in self.dataset['厂牌']:
                self.Cp.append(data)
            self.Cx = []
            for data in self.dataset['车系']:
                self.Cx.append(data)
            self.Gszkl = []
            for data in self.dataset['工时折扣率']:
                self.Gszkl.append(float(data))
            self.Dsygh = []
            for data in self.dataset['定损员工号']:
                self.Dsygh.append(data)
            self.Hsygh = []
            for data in self.dataset['核损员工号']:
                self.Hsygh.append(data)
            self.Hsyxm = []
            for data in self.dataset['核损员姓名']:
                self.Hsyxm.append(data)
            self.Dsyxm = []
            for data in self.dataset['定损员名称']:
                self.Dsyxm.append(data)
            self.Zdy = []
            for data in self.dataset['配件来源']:
                self.Zdy.append(data)

            self.pc_dict = all_list(self.Dingsunxiangmu)#统计转换后的定损项目的频次
            self.Changpai = convertchangpai(self.Cp)#转换厂牌
            self.cp_dict = all_list(self.Changpai)#计算转换后的厂牌的频次
            self.Chexi = convertchexi(self.Changpai, self.Cx)#转换车系
            self.cx_dict = self.all_list1(self.Changpai, self.Chexi)#统计车系的频次

            self.A = []  # 机构、品牌、车系、工时价格类型,工时项目
            self.B = []  # 品牌、车系、工时价格类型,工时项目
            for i in range(len(self.Changpai)):
                self.A.append(sheng + self.Changpai[i] + self.Chexi[i] + self.Gsdjlx[i] + self.Dingsunxiangmu[i])
                self.B.append(self.Changpai[i] + self.Chexi[i] + self.Gsdjlx[i] + self.Dingsunxiangmu[i])
            if task == '喷漆':
                self.A1 = get_mean_pengqi(self.A, self.Zhpqf, self.Pqlx)  # 机构、品牌、车系、工时价格类型,工时项目的平均价格
                self.B1 = get_mean_pengqi(self.B, self.Zhpqf, self.Pqlx)  # 品牌、车系、工时价格类型,工时项目的平均价格
            elif task == '拆装':
                self.A1 = get_mean_caizhaung(self.A, self.Zhpqf)  # 机构、品牌、车系、工时价格类型,工时项目的平均价格
                self.B1 = get_mean_caizhaung(self.B, self.Zhpqf)  # 品牌、车系、工时价格类型,工时项目的平均价格
            elif task == '维修':
                self.A1 = get_mean_weixiu(self.A, self.Zhpqf)  # 机构、品牌、车系、工时价格类型,工时项目的平均价格
                self.B1 = get_mean_weixiu(self.B, self.Zhpqf)  # 品牌、车系、工时价格类型,工时项目的平均价格
            self.A2 = all_list(self.A)  # 机构、品牌、车系、工时价格类型,工时项目的平均数量
            self.B2 = all_list(self.B)  # 品牌、车系、工时价格类型,工时项目的平均数量

            self.Dict_duty = {}
            with open('file\查勘责任比例.txt') as f:
                datas = f.readlines()
                for data in datas:
                    data = data.strip()
                    data = data.split('|')
                    self.Dict_duty[data[0]] = data[-1]
            self.LS1 = ['杠', '杆', '轮', '叶', '翼', '灯', '盖', '门', '钢圈', 'A', 'B', 'C']

    #—————字段名称列表————————
            self.L1 = []#定损单号
            self.L2 = []#原始定损项目名称
            self.L3 = []#定损项目名称
            self.L4 = []#合作类型
            self.L5 = []#修理厂编码
            self.L6 = []#操作类型
            self.L7 = []#喷漆类型
            self.L9 = []#厂牌
            self.L10 = []#车系
            self.L11 = []#修理厂类
            self.L12 = []#工时单价类型
            self.L13 = []#是否承修厂牌
            self.L14 = []#折后喷漆费
            self.L15 = []#国别
            self.L16 = []#修理厂地址
            self.L17 = []#修理厂名称
            self.L18 = []#工时折扣率
            self.L19 = []#除以工时折扣率的喷漆费
            self.L20 = []#维修程度
            self.L21 = []#维修费
            self.L22 = []#拆装费
            self.L23 = []#定损员工号
            self.L24 = []#核损员工号
            self.L25 = []#核损员姓名
            self.L26 = []#定损员姓名
            self.L27 = []#是否含有补差价
            self.L28 = []#是否自定义
            self.L29 = []#定损项目不规范
            self.L30 = []#品牌车系录入不规范
            self.L31 = []#训练数据量不足
            self.L32 = []#定损项目金额过低
            self.L33 = []#是否单个项目补差
            self.L34 = []#责任标记
            self.L35 = []#定损项目金额过高
            self.L36 = []#外修费
            self.L37 = []  # 定损价格排序过高过低的标记,跟单标记

    #——————填充各个字段——————
            for i in range(len(self.Dingsunxiangmu)):
                if self.Gszkl[i] > 0 and self.Zhpqf[i] > 0:
                    if task == '维修':
                        if self.Pjwxf[i] > 0:
                            self.L36.append(1)
                        else:
                            self.L36.append(0)
                    if '差' in self.Dsxmmc[i]:
                        if not '差速器' in self.Dsxmmc[i]:
                            self.L35.append(1)
                        else:
                            self.L35.append(0)
                    else:
                        self.L35.append(0)
                    data = self.Dsdh[i].split('-')
                    try:
                        if self.Dict_duty[data[0]] == '同责':
                            if data[1] == '0202':
                                self.L34.append(1)
                            else:
                                self.L34.append(0)
                        elif self.Dict_duty[data[0]] == '次责':
                            self.L34.append(1)
                        else:
                            self.L34.append(0)
                    except:
                        self.L34.append(0)
                    if '自定义' in self.Zdy[i]:
                        self.L33.append(1)
                    else:
                        self.L33.append(0)
                    if self.Changpai[i] == '无' or self.Chexi[i] == '无' or '货车' in self.Changpai[i] or '摩托' in self.Changpai[i]:
                        self.L29.append(1)
                    else:
                        self.L29.append(0)
                    if self.cp_dict[self.Changpai[i]] < 10 or self.pc_dict[self.Dingsunxiangmu[i]] <= 2 or self.cx_dict[self.Changpai[i] + self.Chexi[i]] < 5:
                        self.L30.append(1)
                    else:
                        self.L30.append(0)
                    try:
                        o = self.fac_place[self.Xlcbm[i]]
                    except:
                        o = '无'
                    p = self.Xlcmc[i]
                    self.L1.append(self.Dsdh[i])
                    self.L2.append(self.Dsxmmc[i])
                    c = 0
                    for s in self.LS1:
                        if s in self.Dsxmmc[i]:
                            c += 1
                            if c >= 2:
                                break
                    if c >= 2:
                        self.L28.append(1)
                    else:
                        if isornot_fitting_barbarism(self.Dsxmmc[i]):
                            self.L28.append(1)
                        else:
                            self.L28.append(0)
                    self.L3.append(self.Dingsunxiangmu[i])
                    self.L4.append(self.Hzlx[i])
                    self.L5.append(self.Xlcbm[i])  # 不作为训练依据
                    if task == '喷漆':
                        self.L6.append(self.Czlx[i])
                        self.L7.append(self.Pqlx[i])
                    self.L15.append(self.Gb[i])
                    self.L9.append(self.Changpai[i])
                    self.L10.append(self.Chexi[i])
                    if self.Xlclx[i] == '4S店':
                        self.L11.append('4S店')
                    else:
                        self.L11.append('综合修理厂')
                    if self.Gsdjlx[i] == '4S店':
                        self.L12.append('4S店')
                    else:
                        self.L12.append('综合修理厂')
                    self.L13.append(self.Sfcxcp[i])
                    self.L14.append(self.Zhpqf[i])
                    self.L16.append(o)
                    self.L17.append(p)
                    self.L18.append(self.Gszkl[i])
                    self.L19.append(float(self.Zhpqf[i]))  # / float(Gszkl[i]) * 100
                    if task == '喷漆':
                        if float(self.Zhpqf[i]) < 10:  # / float(Gszkl[i]) * 100
                            self.L31.append(1)
                        else:
                            self.L31.append(0)
                        if float(self.Zhpqf[i]) >= 999999:  # / float(Gszkl[i]) * 100
                            self.L32.append(1)
                        else:
                            self.L32.append(0)
                    elif task == '拆装':
                        if float(self.Zhpqf[i]) < 5:  # / float(Gszkl[i]) * 100
                            self.L31.append(1)
                        else:
                            self.L31.append(0)
                        if float(self.Zhpqf[i]) >= 1000:  # / float(Gszkl[i]) * 100
                            self.L32.append(1)
                        else:
                            self.L32.append(0)
                    elif task == '维修':
                        self.L20.append(self.Wxcd[i])
                        self.L21.append(self.Wei[i])
                        self.L22.append(self.Chai[i])
                        if float(self.Zhpqf[i]) < 5:  # / float(Gszkl[i]) * 100
                            self.L31.append(1)
                        else:
                            self.L31.append(0)
                        if float(self.Zhpqf[i]) >= 10000:  # / float(Gszkl[i]) * 100
                            self.L32.append(1)
                        else:
                            self.L32.append(0)
                    self.L23.append(self.Dsygh[i])
                    self.L24.append(self.Hsygh[i])
                    self.L25.append(self.Hsyxm[i])
                    self.L26.append(self.Dsyxm[i])
                    self.L27.append(self.Sfbc[i])
                    try:
                        if self.A2[sheng + self.Changpai[i] + self.Chexi[i] + self.Gsdjlx[i] + self.Dingsunxiangmu[
                            i]] >= 5 and float(self.Zhpqf[i]) / self.A1[i] >= 3:
                            self.L37.append(1)
                        elif self.A1[i] / float(self.Zhpqf[i]) >= 3:
                            self.L37.append(1)
                        elif float(self.Zhpqf[i]) / self.B1[i] >= 2 or self.B1[i] / float(self.Zhpqf[i]) >= 3:
                            self.L37.append(1)
                        else:
                            self.L37.append(0)
                    except:
                        self.L37.append(1)


#——————获取修理厂编号对应的地级市——————
    def get_fac_palce(self):
        datas = pd.read_excel('file\全量修理厂清单.xlsx')
        code_factory = datas['修理厂代码']
        address_factory = datas['地级市']
        for i in range(len(code_factory)):
            self.fac_place[code_factory[i]] = address_factory[i]

# ——————获取一个列表的个数字典(两个参数)————————
    def all_list1(self,arr1, arr2):
        result = {}
        for i in range(len(arr1)):
            if not arr1[i] + arr2[i] in result:
                result[arr1[i] + arr2[i]] = 1
            else:
                result[arr1[i] + arr2[i]] += 1
        return result

#——————生成第一版文件————————
    def create_edition_one(self):
        outputs = open(self.outputfile, 'w', newline='')
        csv_writer = csv.writer(outputs, dialect='excel')
        if self.task == '喷漆':
            csv_writer.writerow(['定损单号', '原始定损项目名称', '定损项目名称', '合作类型', '操作类型', '喷漆类型', '国别', '厂牌', '车系', '修理厂类型', \
                                 '工时单价类型', '是否承修厂牌', '除以工时折扣率的喷漆费', '折后喷漆费', '修理厂编码', '修理厂地址', '修理厂名称', '工时折扣率', '定损员工号',
                                 '定损员姓名', '核损员工号', '核损员姓名', '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范',
                                 '训练数据量不足', '定损项目金额过低', '定损项目金额过高', '跟单标记', '定损价格排序过高过低'])
        elif self.task == '拆装':
            csv_writer.writerow(['定损单号', '原始定损项目名称', '定损项目名称', '合作类型', '国别', '厂牌', '车系', '修理厂类型', \
                                 '工时单价类型', '是否承修厂牌', '折前拆装费', '折后拆装费', '修理厂编码', '修理厂地址', '修理厂名称', '工时折扣率', '定损员工号',
                                 '定损员姓名', '核损员工号', '核损员姓名', '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范',
                                 '训练数据量不足', '定损项目金额过低', '定损项目金额过高', '跟单标记', '定损价格排序过高过低'])
        elif self.task == '维修':
            csv_writer.writerow(['定损单号', '原始定损项目名称', '定损项目名称', '合作类型', '国别', '厂牌', '车系', '修理厂类型', \
                                 '工时单价类型', '是否承修厂牌', '维修程度', '折扣前的费用', '折后维修费减去折后拆装费', '修理厂编码', \
                                 '修理厂地址', '修理厂名称', '折后维修费', '折后拆装费', '工时折扣率', '定损员工号', '定损员姓名', '核损员工号', '核损员姓名',
                                 '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范', '训练数据量不足', '定损项目金额过低',
                                 '定损项目金额过高', '跟单标记', '是否含有配件外修费','定损价格排序过高过低'])

        for i in range(len(self.L2)):
            try:
                data = []
                data.append(self.L1[i])
                data.append(self.L2[i])
                data.append(self.L3[i])
                data.append(self.L4[i])
                if self.task == '喷漆':
                    data.append(self.L6[i])
                    data.append(self.L7[i])
                data.append(self.L15[i])
                data.append(self.L9[i])
                data.append(self.L10[i])
                data.append(self.L11[i])
                data.append(self.L12[i])
                data.append(self.L13[i])
                if self.task == '维修':
                    data.append(self.L20[i])
                data.append(self.L19[i])
                data.append(self.L14[i])
                data.append(self.L5[i])
                data.append(self.L16[i])
                data.append(self.L17[i])
                if self.task == '维修':
                    data.append(self.L21[i])
                    data.append(self.L22[i])
                data.append(self.L18[i])
                data.append(self.L23[i])
                data.append(self.L26[i])
                data.append(self.L24[i])
                data.append(self.L25[i])
                data.append(self.L27[i])
                data.append(self.L35[i])
                data.append(self.L33[i])
                data.append(self.L28[i])
                data.append(self.L29[i])
                data.append(self.L30[i])
                data.append(self.L31[i])
                data.append(self.L32[i])
                data.append(self.L34[i])
                if self.task == '维修':
                    data.append(self.L36[i])
                data.append(self.L37[i])
                csv_writer.writerow(data)
            except UnicodeEncodeError:
                print(data)

# ——————生成第二版文件————————
    def create_edition_two(self):
#—————标记跟单案件———————————
        Dict_duty = {}
        filename = self.outputfile
        with open('file\查勘责任比例.txt') as f:
            datas = f.readlines()
            for data in datas:
                data = data.strip()
                data = data.split('|')
                Dict_duty[data[0]] = data[-1]
        Anhui = pd.read_csv(filename,encoding='gbk')
        dsdh = Anhui['定损单号']
        L = []
        for data in dsdh:
            data = data.split('-')
            try:
                if Dict_duty[data[0]] == '同责':
                    if data[1] == '0202':
                        L.append(1)
                    else:
                        L.append(0)
                elif Dict_duty[data[0]] == '次责':
                    L.append(1)
                else:
                    L.append(0)
            except:
                L.append(0)
        Anhui['跟单案件'] = np.c_[L]
        Anhui.to_csv(filename,index=0,encoding='gbk')
#——————根据频次计算价格————————
        i = 0
        Dict = {}
        Dict1 = {}
        filename1 = self.outputfile
        filename2 = 'temp.csv'
        if self.task == '喷漆':
            with open(filename1) as f:
                datas = f.readlines()
                for data in datas:
                    data = data.strip()
                    data = data.split(',')
                    if i > 0:
                        if len(data) == 33:
                            l1 = data[:12] + data[14:]
                            l1 = [x + '#' for x in l1]
                            l2 = ''
                            for x in l1:
                                l2 += x
                            l2 += '#' + str(data[13])
                            l = data[2] + '#' + data[5] + '#' + data[7] + '#' + data[8] + '#' + data[9]
                            if not l in Dict:
                                Dict[l] = 1
                            else:
                                Dict[l] += 1

                            if not l in Dict1:
                                Dict1[l] = [l2]
                            else:
                                Dict1[l].append(l2)
                    i += 1
            outputs = open(filename2, 'w', newline='')
            import csv
            csv_write = csv.writer(outputs, dialect='excel')
            csv_write.writerow(
                ['定损单号', '原始定损项目', '定损项目名称', '合作类型', '操作类型', '喷漆类型', '国别', '厂牌', '车系', '修理厂类型', '工时单价类型', '是否承修厂牌', '修理厂编码',
                 '修理厂地址',
                 '修理厂名称', '工时折扣率', '定损员工号', '定损员姓名', '核损员工号', '核损员姓名', '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范',
                 '训练数据量不足', '定损项目金额过低', '定损项目金额过高', '跟单标记','定损价格排序过高过低','跟单案件', '不要1', '不要2', '价格', 'n', 'i', 'rank'])
            for key in Dict1:
                L = Dict1[key]
                n = Dict[key]
                D = {}
                for l in L:
                    data = l.split('#')
                    l1 = data[:-1]
                    l1 = [x + '#' for x in l1]
                    k = ''
                    for x in l1:
                        k += x
                    v = float(data[-1])
                    D[k] = v
                D = sorted(D.items(), key=lambda x: x[1])
                i = 1
                for d in D:
                    data = []
                    data.extend(d[0].split('#'))
                    data.append(d[1])
                    data.append(n)
                    data.append(i)
                    data.append(i - n / 2)
                    csv_write.writerow(data)
                    i += 1
        elif self.task == '拆装':
            with open(filename1) as f:
                datas = f.readlines()
                for data in datas:
                    data = data.strip()
                    data = data.split(',')
                    if i > 0:
                        if len(data) == 31:
                            l1 = data[:10] + data[12:]
                            l1 = [x + '#' for x in l1]
                            l2 = ''
                            for x in l1:
                                l2 += x
                            l2 += '#' + str(data[10])
                            l = data[2] + '#' + data[5] + '#' + data[6] + '#' + data[7]
                            if not l in Dict:
                                Dict[l] = 1
                            else:
                                Dict[l] += 1

                            if not l in Dict1:
                                Dict1[l] = [l2]
                            else:
                                Dict1[l].append(l2)
                    i += 1
            import csv
            outputs = open(filename2, 'w', newline='')
            csv_write = csv.writer(outputs, dialect='excel')
            csv_write.writerow(
                ['定损单号', '原始定损项目', '定损项目名称', '合作类型', '国别', '厂牌', '车系', '修理厂类型', '工时单价类型', '是否承修厂牌', '修理厂编码', '修理厂地址',
                 '修理厂名称', '工时折扣率', '定损员工号', '定损员姓名', '核损员工号', '核损员姓名', '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范',
                 '训练数据量不足', '定损项目金额过低', '定损项目金额过高', '跟单标记','定损价格排序过高过低', '跟单案件', '不要1', '不要2', '价格', 'n', 'i', 'rank'])
            for key in Dict1:
                L = Dict1[key]
                n = Dict[key]
                D = {}
                for l in L:
                    data = l.split('#')
                    l1 = data[:-1]
                    l1 = [x + '#' for x in l1]
                    k = ''
                    for x in l1:
                        k += x
                    v = float(data[-1])
                    D[k] = v
                D = sorted(D.items(), key=lambda x: x[1])
                i = 1
                for d in D:
                    data = []
                    data.extend(d[0].split('#'))
                    data.append(d[1])
                    data.append(n)
                    data.append(i)
                    data.append(i - n / 2)
                    csv_write.writerow(data)
                    i += 1
        elif self.task == '维修':
            with open(filename1) as f:
                datas = f.readlines()
                for data in datas:
                    data = data.strip()
                    data = data.split(',')
                    if i > 0:
                        if len(data) == 35:
                            l1 = data[:11] + data[13:]
                            l1 = [x + '#' for x in l1]
                            l2 = ''
                            for x in l1:
                                l2 += x
                            l2 += '#' + str(data[11])
                            l = data[2] + '#' + data[5] + '#' + data[6] + '#' + data[7]
                            if not l in Dict:
                                Dict[l] = 1
                            else:
                                Dict[l] += 1

                            if not l in Dict1:
                                Dict1[l] = [l2]
                            else:
                                Dict1[l].append(l2)
                    i += 1
            import csv
            outputs = open(filename2, 'w', newline='')
            csv_write = csv.writer(outputs, dialect='excel')
            csv_write.writerow(
                ['定损单号', '原始定损项目', '定损项目名称', '合作类型', '国别', '厂牌', '车系', '修理厂类型', '工时单价类型', '是否承修厂牌', '维修程度', '修理厂编码', '修理厂地址',
                 '修理厂名称', '折后维修费','折后拆装费','工时折扣率', '定损员工号', '定损员姓名', '核损员工号', '核损员姓名', '是否含有补差价', '是否单个项目补差', '是否自定义', '定损项目不规范', '品牌车系录入不规范',
                 '训练数据量不足', '定损项目金额过低', '定损项目金额过高', '跟单标记', '是否含有外修费','定损价格排序过高过低', '跟单案件', '不要1', '不要2', '价格', 'n', 'i', 'rank'])
            for key in Dict1:
                L = Dict1[key]
                n = Dict[key]
                D = {}
                for l in L:
                    data = l.split('#')
                    l1 = data[:-1]
                    l1 = [x + '#' for x in l1]
                    k = ''
                    for x in l1:
                        k += x
                    v = float(data[-1])
                    D[k] = v
                D = sorted(D.items(), key=lambda x: x[1])
                i = 1
                for d in D:
                    data = []
                    data.extend(d[0].split('#'))
                    data.append(d[1])
                    data.append(n)
                    data.append(i)
                    data.append(i - n / 2)
                    csv_write.writerow(data)
                    i += 1
        datas = pd.read_csv(filename2, encoding='gbk')
        datas.drop(['不要1', '不要2'], axis=1, inplace=True)
        datas.to_csv(filename2, encoding='gbk', index=0)
#———————得到标记————————
        datas = pd.read_csv(filename2,encoding='gbk',error_bad_lines=False)
        N = datas['n']
        I = datas['i']
        L = []
        for i in range(len(I)):
            if N[i] * 0.1 >= I[i] or N[i] * 0.9 <= I[i]:
                if I[i] <= 3 or I[i] >= N[i] - 3:
                    if N[i] >= 10:
                        L.append(1)
                    else:
                        L.append(0)
                else:
                    L.append(0)
            else:
                L.append(0)
        datas['标记'] = np.c_[L]
        if self.task == '喷漆':
            datas = datas.drop(columns = ['操作类型'])
#——————获得车系类别——————————————
        class_chexi = classyfichexi(datas)
        datas['车系类别'] = np.c_[class_chexi]
        datas.to_csv(filename1,index=0,encoding='gbk')

# ——————生成第三版文件(加入均值众数)————————
    def get_mean1cp(self,X):
        L = []
        for x in X:
            try:
                L.append(self.Dict1[x])
            except:
                L.append(-1)
        return L

    def get_meancx(self,X, Y):
        L = []
        for i in range(len(X)):
            try:
                L.append(self.Dict2[X[i] + Y[i]])
            except:
                try:
                    L.append(self.Dict1[X[i]])
                except:
                    L.append(-1)
        return L

    def create_edition_three(self):
        filename = self.outputfile
        datas = pd.read_csv(filename, encoding='gbk')
        self.Dict1 = get_changpai_price(self.sheng)
        self.Dict2 = get_chexi_price(self.sheng)

        Dingsunxm = list(datas['定损项目名称'])
        Hezuoleix = list(datas['合作类型'])
        Changpai = list(datas['厂牌'])
        Chexi = list(datas['车系'])
        Xiulichanglx = list(datas['修理厂类型'])
        mean_changpai = self.get_mean1cp(Changpai)
        mean_chexi = self.get_meancx(Changpai, Chexi)
        onehot_path = 'json/{}/{}'.format(self.task,self.sheng)
        with open('{}/项目均值.json'.format(onehot_path)) as f:
            M_xm = json.load(f)
        with open('{}/项目众数.json'.format(onehot_path)) as f:
            Z_xm = json.load(f)
        with open('{}/修理厂均值.json'.format(onehot_path)) as f:
            M_xlc = json.load(f)
        with open('{}/修理厂众数.json'.format(onehot_path)) as f:
            Z_xlc = json.load(f)
        with open('{}/合作类型均值.json'.format(onehot_path)) as f:
            M_hzlx = json.load(f)
        with open('{}/合作类型众数.json'.format(onehot_path)) as f:
            Z_hzlx = json.load(f)
        mean_peijian = []
        mean_hzlx = []
        mean_xiulichang = []
        zhong_hzlx = []
        zhong_peijian = []
        zhong_xiulichang = []
        for i in range(len(Dingsunxm)):
            try:
                mean_peijian.append(M_xm[Dingsunxm[i]])
            except:
                mean_peijian.append(0)
            try:
                mean_xiulichang.append(M_xlc[Xiulichanglx[i]])
            except:
                mean_xiulichang.append(0)
            try:
                mean_hzlx.append(M_hzlx[Hezuoleix[i]])
            except:
                mean_hzlx.append(0)
            try:
                zhong_peijian.append(Z_xm[Dingsunxm[i]])
            except:
                zhong_peijian.append(0)
            try:
                zhong_xiulichang.append(Z_xlc[Xiulichanglx[i]])
            except:
                zhong_xiulichang.append(0)
            try:
                zhong_hzlx.append(Z_hzlx[Hezuoleix[i]])
            except:
                zhong_hzlx.append(0)
        datas['厂牌均值'] = np.c_[mean_changpai]
        datas['车系均值'] = np.c_[mean_chexi]
        datas['合作类型均值'] = np.c_[mean_hzlx]
        datas['项目均值'] = np.c_[mean_peijian]
        datas['修理厂均值'] = np.c_[mean_xiulichang]
        datas['合作类型众数'] = np.c_[zhong_hzlx]
        datas['项目众数'] = np.c_[zhong_peijian]
        datas['修理厂众数'] = np.c_[zhong_xiulichang]
        datas.to_csv(filename, encoding='gbk', index=0)

# ——————生成第四版文件(加入F值)————————
    def create_edition_four(self):
        filename = self.outputfile
        fivefeature = 'trainfile\{}\{}\五个特征.csv'.format(self.task, self.sheng)
        datas = pd.read_csv(filename, encoding='gbk')

        f1 = datas['定损项目名称']
        f2 = datas['厂牌']
        f3 = datas['车系']
        f4 = datas['修理厂类型']
        f0 = datas['修理厂地址']

        A1 = {}
        A2 = {}
        B1 = {}
        B2 = {}
        C1 = {}
        C2 = {}
        D1 = {}
        D2 = {}
        E1 = {}
        E2 = {}
        F_1 = {}
        F_2 = {}
        with open(fivefeature) as f:
            datas1 = f.readlines()
            for data in datas1:
                data = data.strip()
                data = data.split(',')
                x1 = data[0] + data[1] + data[2] + data[3] + data[4]
                x2 = data[0] + data[1] + data[2] + data[4]
                x3 = data[1] + data[2] + data[3] + data[4]
                x4 = data[1] + data[2] + data[4]
                x5 = data[0] + data[1] + data[4]
                x6 = data[1] + data[4]
                try:
                    A1[x1] = float(data[5])
                    A2[x1] = float(data[6])
                    B1[x2] = float(data[7])
                    B2[x2] = float(data[8])
                    C1[x3] = float(data[9])
                    C2[x3] = float(data[10])
                    D1[x4] = float(data[11])
                    D2[x4] = float(data[12])
                    E1[x5] = float(data[13])
                    E2[x5] = float(data[14])
                    F_1[x6] = float(data[15])
                    F_2[x6] = float(data[16])
                except ValueError:
                    pass
        F0 = []
        F1 = []
        F2 = []
        F3 = []
        F4 = []
        F5 = []
        F6 = []
        for i in range(len(f1)):
            X1 = f0[i] + f1[i] + f2[i] + f3[i] + f4[i]
            X2 = f0[i] + f1[i] + f2[i] + f4[i]
            X3 = f1[i] + f2[i] + f3[i] + f4[i]
            X4 = f1[i] + f2[i] + f4[i]
            X5 = f0[i] + f1[i] + f4[i]
            X6 = f1[i] + f4[i]
            if A2.get(X1) is not None and A2[X1] >= 8:
                f0_ = A1[X1]
                f1_ = 1
                f2_ = 0
                f3_ = 0
                f4_ = 0
                f5_ = 0
                f6_ = 0
            elif B2.get(X2) is not None and B2[X2] >= 8:
                f0_ = B1[X2]
                f1_ = 0
                f2_ = 1
                f3_ = 0
                f4_ = 0
                f5_ = 0
                f6_ = 0
            elif C2.get(X3) is not None and C2[X3] >= 8:
                f0_ = C1[X3]
                f1_ = 0
                f2_ = 0
                f3_ = 1
                f4_ = 0
                f5_ = 0
                f6_ = 0
            elif D2.get(X4) is not None and D2[X4] >= 8:
                f0_ = D1[X4]
                f1_ = 0
                f2_ = 0
                f3_ = 0
                f4_ = 1
                f5_ = 0
                f6_ = 0
            elif E2.get(X5) is not None and E2[X5] >= 8:
                f0_ = E1[X5]
                f1_ = 0
                f2_ = 0
                f3_ = 0
                f4_ = 0
                f5_ = 1
                f6_ = 0
            elif F_2.get(X6) is not None and F_2[X6] >= 8:
                f0_ = F_1[X6]
                f1_ = 0
                f2_ = 0
                f3_ = 0
                f4_ = 0
                f5_ = 0
                f6_ = 1
            else:
                f0_ = -1
                f1_ = 0
                f2_ = 0
                f3_ = 0
                f4_ = 0
                f5_ = 0
                f6_ = 0

            F0.append(f0_)
            F1.append(f1_)
            F2.append(f2_)
            F3.append(f3_)
            F4.append(f4_)
            F5.append(f5_)
            F6.append(f6_)
        datas['F0'] = np.c_[F0]
        datas['F1'] = np.c_[F1]
        datas['F2'] = np.c_[F2]
        datas['F3'] = np.c_[F3]
        datas['F4'] = np.c_[F4]
        datas['F5'] = np.c_[F5]
        datas['F6'] = np.c_[F6]
        datas = datas.drop(columns=['训练数据量不足'])

        datas['机构名'] = np.c_[[self.sheng for i in range(len(datas))]]
        if self.task == '喷漆':
            order = ['定损单号', '原始定损项目', '定损项目名称', '合作类型', '喷漆类型', '国别', '厂牌', '车系类别', '修理厂类型', '工时单价类型', \
                     '是否承修厂牌', '厂牌均值', '车系均值', '合作类型均值', '项目均值', '修理厂均值', '合作类型众数', '项目众数', \
                     '修理厂众数', 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', '价格', '修理厂编码','机构名', '修理厂地址', '修理厂名称', '工时折扣率', '定损员工号',
                     '定损员姓名','核损员工号', '核损员姓名','车系']
        elif self.task == '拆装':
            order = ['定损单号', '原始定损项目', '定损项目名称', '合作类型', '国别', '厂牌', '车系类别', '修理厂类型', '工时单价类型', \
                     '是否承修厂牌', '厂牌均值', '车系均值', '合作类型均值', '项目均值', '修理厂均值', '合作类型众数', '项目众数', \
                     '修理厂众数', 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', '价格', '修理厂编码', '机构名', '修理厂地址', '修理厂名称', '工时折扣率',
                     '定损员工号','定损员姓名', '核损员工号', '核损员姓名', '车系']
        elif self.task == '维修':
            order = ['定损单号', '原始定损项目', '定损项目名称', '合作类型','维修程度', '国别', '厂牌', '车系类别', '修理厂类型', '工时单价类型', \
                     '是否承修厂牌', '厂牌均值', '车系均值', '合作类型均值', '项目均值', '修理厂均值', '合作类型众数', '项目众数', \
                     '修理厂众数', 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', '价格', '修理厂编码', '机构名', '修理厂地址', '修理厂名称', '工时折扣率',
                     '定损员工号','定损员姓名', '核损员工号', '核损员姓名', '车系']
        datas = datas[order]
        datas.to_csv(filename, index=0, encoding='gbk')

    def make(self):
        self.create_edition_one()
        self.create_edition_two()
        self.create_edition_three()
        self.create_edition_four()

    def eval(self):
        np.set_printoptions(threshold=5000)
        onehot_path = 'json/{}/{}'.format(self.task, self.sheng)
        test_file = self.outputfile
        model_name = 'trainfile/{}/{}/{}.model'.format(self.task, self.sheng, self.sheng + self.task)
        test_output = 'testfile/{}/{}/{}预测数据.xlsx'.format(self.task,self.sheng,self.sheng + self.task)
        dataset = pd.read_csv(test_file, encoding='gbk')  # 注意自己数据路径
        results1 = []
        Acc = []
        if self.task == '喷漆':
            for i in range(len(dataset)):
                try:
                    test = dataset.iloc[i, 2:11].values
                    test2 = dataset.iloc[i, 11:26].values
                    labels = dataset.iloc[i, 26]

                    with open('{}/定损项目名称.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[0]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary0 = np.array(L)

                    with open('{}/合作类型.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[1]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary1 = np.array(L)

                    with open('{}/喷漆类型.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D['全漆']:  # test[2]
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary3 = np.array(L)

                    with open('{}/国别.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[3]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary4 = np.array(L)

                    with open('{}/厂牌.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[4]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary5 = np.array(L)

                    with open('{}/车系.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[5]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary6 = np.array(L)

                    with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[6]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary7 = np.array(L)

                    with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[7]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary8 = np.array(L)

                    with open('{}/是否承修厂牌.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[8]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary9 = np.array(L)

                    num = []
                    num.append(len(intermediary0.tolist()))
                    num.append(len(intermediary1.tolist()))
                    num.append(len(intermediary3.tolist()))
                    num.append(len(intermediary4.tolist()))
                    num.append(len(intermediary5.tolist()))
                    num.append(len(intermediary6.tolist()))
                    num.append(len(intermediary7.tolist()))
                    num.append(len(intermediary8.tolist()))
                    num.append(len(intermediary9.tolist()))
                    trains = np.zeros(sum(num))
                    trains[:int(num[0])] = intermediary0
                    trains[int(num[0]):int(sum(num[:2]))] = intermediary1
                    trains[int(sum(num[:2])):int(sum(num[:3]))] = intermediary3
                    trains[int(sum(num[:3])):int(sum(num[:4]))] = intermediary4
                    trains[int(sum(num[:4])):int(sum(num[:5]))] = intermediary5
                    trains[int(sum(num[:5])):int(sum(num[:6]))] = intermediary6
                    trains[int(sum(num[:6])):int(sum(num[:7]))] = intermediary7
                    trains[int(sum(num[:7])):int(sum(num[:8]))] = intermediary8
                    trains[int(sum(num[:8])):int(sum(num))] = intermediary9
                    trainss = np.concatenate((trains, test2))
                    labels = np.expand_dims(np.array(labels), axis=0)
                    xgtest = xgb.DMatrix(np.expand_dims(trainss,axis=0), label=labels)
                    model = xgb.Booster(model_file=model_name)
                    preds = model.predict(xgtest)
                    acc = exp(-abs(labels[0] - preds[0]) / labels[0] * 2)
                    results1.append(preds[0])
                    Acc.append(acc)
                    print(labels[0], preds[0], acc)
                except:
                    results1.append('错误')
                    Acc.append(0)

        elif self.task == '拆装':
            for i in range(len(dataset)):
                try:
                    test = dataset.iloc[i, 2:10].values
                    test2 = dataset.iloc[i, 10:25].values
                    labels = dataset.iloc[i, 25]
                    with open('{}/定损项目名称.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[0]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary0 = np.array(L)

                    with open('{}/合作类型.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[1]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary1 = np.array(L)

                    with open('{}/国别.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[2]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary2 = np.array(L)

                    with open('{}/厂牌.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[3]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary3 = np.array(L)

                    with open('{}/车系.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[4]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary4 = np.array(L)

                    with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[5]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary5 = np.array(L)

                    with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[6]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary6 = np.array(L)

                    with open('{}/是否承修厂牌.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[7]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary7 = np.array(L)

                    num = []
                    num.append(len(intermediary0.tolist()))
                    num.append(len(intermediary1.tolist()))
                    num.append(len(intermediary2.tolist()))
                    num.append(len(intermediary3.tolist()))
                    num.append(len(intermediary4.tolist()))
                    num.append(len(intermediary5.tolist()))
                    num.append(len(intermediary6.tolist()))
                    num.append(len(intermediary7.tolist()))
                    trains = np.zeros(sum(num))
                    trains[:int(num[0])] = intermediary0
                    trains[int(num[0]):int(sum(num[:2]))] = intermediary1
                    trains[int(sum(num[:2])):int(sum(num[:3]))] = intermediary2
                    trains[int(sum(num[:3])):int(sum(num[:4]))] = intermediary3
                    trains[int(sum(num[:4])):int(sum(num[:5]))] = intermediary4
                    trains[int(sum(num[:5])):int(sum(num[:6]))] = intermediary5
                    trains[int(sum(num[:6])):int(sum(num[:7]))] = intermediary6
                    trains[int(sum(num[:7])):int(sum(num))] = intermediary7
                    trainss = np.concatenate((trains, test2))
                    labels = np.expand_dims(np.array(labels), axis=0)
                    xgtest = xgb.DMatrix(np.expand_dims(trainss, axis=0), label=labels)
                    model = xgb.Booster(model_file=model_name)
                    preds = model.predict(xgtest)
                    acc = exp(-abs(labels[0] - preds[0]) / labels[0] * 2)
                    results1.append(preds[0])
                    Acc.append(acc)
                    print(labels[0], preds[0], acc)
                except:
                    results1.append('错误')
                    Acc.append(0)
        elif self.task == '维修':
            for i in range(len(dataset)):
                try:
                    test = dataset.iloc[i, 2:11].values
                    test2 = dataset.iloc[i, 11:26].values
                    labels = dataset.iloc[i, 26]
                    with open('{}/定损项目名称.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[0]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary0 = np.array(L)

                    with open('{}/合作类型.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[1]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary1 = np.array(L)

                    with open('{}/维修程度.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[2]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary2 = np.array(L)

                    with open('{}/国别.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[3]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary3 = np.array(L)

                    with open('{}/厂牌.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[4]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary4 = np.array(L)

                    with open('{}/车系.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[5]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary5 = np.array(L)

                    with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[6]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary6 = np.array(L)

                    with open('{}/修理厂类型.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[7]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary7 = np.array(L)

                    with open('{}/是否承修厂牌.json'.format(onehot_path)) as f:
                        D = json.load(f)
                    L = []
                    for a in D[test[8]]:
                        try:
                            L.append(int(a))
                        except:
                            pass
                    intermediary8 = np.array(L)

                    num = []
                    num.append(len(intermediary0.tolist()))
                    num.append(len(intermediary1.tolist()))
                    num.append(len(intermediary2.tolist()))
                    num.append(len(intermediary3.tolist()))
                    num.append(len(intermediary4.tolist()))
                    num.append(len(intermediary5.tolist()))
                    num.append(len(intermediary6.tolist()))
                    num.append(len(intermediary7.tolist()))
                    num.append(len(intermediary8.tolist()))
                    trains = np.zeros(sum(num))
                    trains[:int(num[0])] = intermediary0
                    trains[int(num[0]):int(sum(num[:2]))] = intermediary1
                    trains[int(sum(num[:2])):int(sum(num[:3]))] = intermediary2
                    trains[int(sum(num[:3])):int(sum(num[:4]))] = intermediary3
                    trains[int(sum(num[:4])):int(sum(num[:5]))] = intermediary4
                    trains[int(sum(num[:5])):int(sum(num[:6]))] = intermediary5
                    trains[int(sum(num[:6])):int(sum(num[:7]))] = intermediary6
                    trains[int(sum(num[:7])):int(sum(num[:8]))] = intermediary7
                    trains[int(sum(num[:8])):int(sum(num))] = intermediary8
                    trainss = np.concatenate((trains, test2))
                    labels = np.expand_dims(np.array(labels), axis=0)
                    xgtest = xgb.DMatrix(np.expand_dims(trainss, axis=0), label=labels)
                    model = xgb.Booster(model_file=model_name)
                    preds = model.predict(xgtest)
                    acc = exp(-abs(labels[0] - preds[0]) / labels[0] * 2)
                    results1.append(preds[0])
                    Acc.append(acc)
                    print(labels[0], preds[0], acc)
                except:
                    results1.append('错误')
                    Acc.append(0)
        dataset['预测折后喷漆费'] = np.c_[results1]
        dataset['准确率'] = np.c_[Acc]
        dataset.to_excel(test_output)
import pandas as pd
import numpy as np

#去除特殊符号
def qx(data):
    try:
        if ',' in data:
            data = data.replace(',', '')
    except:
        pass
    if '。' in data:
        data = data.replace('。','')
    if '-' in data:
        data = data.replace('-','')
    if '*' in data:
        data = data.replace('*', '')
    if ' ' in data:
        data = data.replace(' ', '')
    if '.' in data:
        data = data.replace('.', '')
    if '"' in data:
        data = data.replace('"', '')
    return data

def is_contain_chinese(check_str):

    """

    判断字符串中是否包含中文

    :param check_str: {str} 需要检测的字符串

    :return: {bool} 包含返回True, 不包含返回False

    """

    for ch in check_str:

        if u'\u4e00' <= ch <= u'\u9fff':

            return True

    return False

"""
功能:
厂牌转换函数
输入:
原始厂牌列表
输出:
转换后的厂牌列表
"""
def convertchangpai(Datas):
    Dict1 = {}
    L = []
    with open('file\厂牌修正表20200312.csv') as f:
        datas = f.readlines()
        for data in datas:
            data = data.strip()
            data = data.split(',')
            if qx(data[0]) not in Dict1:
                Dict1[qx(data[0])] = qx(data[1])
    for i in range(len(Datas)):
        Datas[i] = Datas[i].upper()
        try:
            cx = Dict1[qx(Datas[i])]
        except:
            cx = qx(Datas[i])
        if '宝马' in Datas[i]:
            cx = '宝马'
        elif '马自达' in Datas[i]:
            cx = '马自达'
        elif '奔驰' in Datas[i]:
            cx = '奔驰'
        elif '奥迪' in Datas[i]:
            cx = '奥迪'
        elif '保时捷' in Datas[i]:
            cx = '保时捷'
        elif '标致' in Datas[i]:
            cx = '标致'
        elif '哈弗' in Datas[i]:
            cx = '哈弗'
        elif '宝骏' in Datas[i]:
            cx = '宝骏汽车'
        elif '大众' in Datas[i]:
            cx = '大众'
        elif not is_contain_chinese(Datas[i]):
            cx = '无'
        elif '4轮电动车' in Datas[i]:
            cx = '无'
        elif '自定义' in Datas[i] or '标准' in Datas[i]:
            cx = '无'
        L.append(cx)
    return L

#车系去括号
def qxcx(data):
    if '【' in data:
        index = data.find('【')
        data = data[:index]
    if '[' in data:
        index = data.find('[')
        data = data[:index]
    if '(' in data:
        index = data.find('(')
        data = data[:index]
    if '(' in data:
        index = data.find('(')
        data = data[:index]
    return data

"""
功能:
车系转换函数
输入:
Datas1:转换后的厂牌列表
Datas2:转换前的车系列表
输出:
转换后的车系列表
"""
def convertchexi(Datas1,Datas2):#D1,D2分别是厂牌和车系
    L = []
    datas = pd.read_csv('file\车系修正表20200312.csv',encoding='gbk', engine='python')
    fit_name = datas['brand']
    fit_name0 = datas['auto_series_chinaname']
    chexi = datas['auto_series_chinaname0']
    DIc = {}
    for i in range(len(fit_name)):
        try:
            DIc[qx(fit_name[i])+qx(fit_name0[i])] = qx(chexi[i])
        except:
            pass
    for i in range(len(Datas1)):
        Datas1[i] = Datas1[i].upper()
        Datas2[i] = Datas2[i].upper()
        try:
            cx = DIc[qx(Datas1[i])+qx(qxcx(Datas2[i]))]
            L.append(cx.upper())
        except:
                if '自定义' in Datas2[i] or '标准' in Datas2[i]:
                    L.append('无')
                else:
                    L.append(qx(qxcx(Datas2[i])).upper())
    return L

#清洗项目函数
def qxxm(data):
    data = data.upper()
    # if '前' in data:
    #     if not '前围' in data and not '前门' in data:
    #         data = data.replace('前', '')
    # if '后' in data:
    #     if not '后视镜' in data and not '后围' in data:
    #         data = data.replace('后', '')
    # if '左' in data:
    #     data = data.replace('左', '')
    # if '右' in data:
    #     data = data.replace('右', '')
    # if '上' in data:
    #     data = data.replace('上', '')
    # if '下' in data:
    #     data = data.replace('下', '')
    if '(' in data:
        data = data.replace('(','')
    if ')' in data:
        data = data.replace(')','')
    if '(' in data:
        data = data.replace('(','')
    if ')' in data:
        data = data.replace(')','')
    if '"' in data:
        data = data.replace('"', '')
    if '喷漆' in data:
        data = data.replace('喷漆', '')
    if '喷塑' in data:
        data = data.replace('喷塑', '')
    if '修复' in data:
        data = data.replace('修复', '')
    if '含拆装' in data:
        data = data.replace('含拆装', '')
    if '拆装' in data:
        data = data.replace('拆装', '')
    if '油漆' in data:
        data = data.replace('油漆', '')
    if '打包' in data:
        data = data.replace('打包', '')
    if '钣金' in data:
        data = data.replace('钣金', '')
    if '塑喷' in data:
        data = data.replace('塑喷', '')
    if '更换' in data:
        data = data.replace('更换', '')
    if '校修' in data:
        data = data.replace('校修', '')
    if '半喷' in data:
        data = data.replace('打包', '')
    if '切割' in data:
        data = data.replace('切割', '')
    if '焊接' in data:
        data = data.replace('焊接', '')
    if '做漆' in data:
        data = data.replace('做漆', '')
    if '翻新' in data:
        data = data.replace('翻新', '')
    if '打包' in data:
        data = data.replace('打包', '')
    if '处理' in data:
        data = data.replace('处理', '')
    if '半漆' in data:
        data = data.replace('半漆', '')
    if '烤漆' in data:
        data = data.replace('烤漆', '')
    if '塑修' in data:
        data = data.replace('塑修', '')
    if '矫正' in data:
        data = data.replace('矫正', '')
    if '钣喷' in data:
        data = data.replace('钣喷', '')
    if '喷底漆' in data:
        data = data.replace('喷底漆', '')
    if '工时' in data:
        data = data.replace('工时', '')
    if '维修' in data:
        data = data.replace('维修', '')
    if '无法点选' in data:
        data = data.replace('无法点选', '')
    if '镀晶' in data:
        data = data.replace('镀晶', '')
    if '整形' in data:
        data = data.replace('整形', '')
    if '更换' in data:
        data = data.replace('更换', '')
    if '调校' in data:
        data = data.replace('调校', '')
    if '补漆' in data:
        data = data.replace('补漆', '')
    if '半喷' in data:
        data = data.replace('半喷', '')
    if '+' in data:
        data = data.replace('+', '')
    if ':' in data:
        data = data.replace(':', '')
    if '*' in data:
        data = data.replace('*', '')
    if '。' in data:
        data = data.replace('。', '')
    if '.' in data:
        data = data.replace('.', '')
    if '-' in data:
        data = data.replace('-', '')
    if ',' in data:
        data = data.replace(',', '')
    if ',' in data:
        data = data.replace(',', '')
    if data.strip() == '车门':
        data = data.replace('车', '')
    if '翼子板' in data:
        if '后' in data:
            # data = data.replace('翼子板', '叶子板')
            data = '叶子板(后)'
        else:
            data = '叶子板(前 )'
    if '拆' in data:
        data = data.replace('拆', '')
    if '做漆' in data:
        data = data.replace('做漆', '')
    if '本体' in data:
        data = data.replace('本体', '')
    if '补损' in data:
        data = data.replace('补损', '')
    if '碰花' in data:
        data = data.replace('碰花', '')
    if '半' in data:
        data = data.replace('半', '')
    if '现场' in data:
        data = data.replace('现场', '')
    if '部' in data:
        if not '前' in data and not '后' in data and not '中' in data:
            data = data.replace('部', '')
    if '银色' in data:
        data = data.replace('银色', '')
    if '护杠' in data:
        data = '护杠' + '\n'
    if '校正' in data:
        data = data.replace('校正', '')
    if '补充' in data:
        data = data.replace('补充', '')
    if '镀络' in data:
        data = data.replace('镀络', '')
    if '喷绘' in data:
        data = data.replace('喷绘', '')
    if '侧侧' in data:
        data = data.replace('侧侧', '侧')
    if '图喷' in data:
        data = data.replace('图喷', '')
    if '喷字' in data:
        data = data.replace('喷字', '')
    if '防锈漆' in data:
        data = data.replace('防锈漆', '')
    if '外修' in data:
        data = data.replace('外修', '')
    if '喷素' in data:
        data = data.replace('喷素', '')
    if '所有' in data:
        data = data.replace('所有', '')
    if '漆' in data:
        data = data.replace('漆', '')
    if '你' in data:
        data = data.replace('你', '')
    if '金额' in data:
        data = data.replace('金额', '')
    if '段' in data:
        data = data.replace('段', '')
    if '差' in data:
        if not '差速器' in data:
            data = '无'
    if '费' in data:
        data = '无'
    if '工时' in data:
        data = '无'
    if '自定义' in data:
        data = '无'
    if '标准' in data:
        data = '无'
    data = data.replace(' ', '')
    for i in range(10):
        data = data.replace(str(i), '')
    if len(data) == 0:
        data ='无'
    if '及' in data:
        data = '无'
    if '抛光' in data:
        data = '无'
    if ',' in data:
        data = '无'
    if '追加' in data:
        data = '无'
    if '、' in data:
        data = '无'
    if '工时' in data:
        data = '无'
    if '其' in data:
        data = '无'
    if '和' in data:
        data = '无'
    if '事故' in data:
        data = '无'
    if '材料' in data:
        data = '无'
    if '三者' in data:
        data = '无'
    if '定损' in data:
        data = '无'
    if '缺额' in data:
        data = '无'
    if '含' in data:
        data = '无'
    if '补' in data:
        data = '无'
    if '跟单' in data:
        data = '无'
    if '增加' in data:
        data = '无'
    if '整案' in data:
        data = '无'

    if '喷漆' in data:
        data = data.replace('喷漆', '')
    if '喷塑' in data:
        data = data.replace('喷塑', '')
    if '修复' in data:
        data = data.replace('修复', '')
    if '拆装' in data:
        data = data.replace('拆装', '')
    if '油漆' in data:
        data = data.replace('油漆', '')
    if '打包' in data:
        data = data.replace('打包', '')
    if '钣金' in data:
        data = data.replace('钣金', '')
    if '塑喷' in data:
        data = data.replace('塑喷', '')
    if '更换' in data:
        data = data.replace('更换', '')
    if '校修' in data:
        data = data.replace('校修', '')
    if '半喷' in data:
        data = data.replace('半喷', '')
    if '切割' in data:
        data = data.replace('切割', '')
    if '焊接' in data:
        data = data.replace('焊接', '')
    if '做漆' in data:
        data = data.replace('做漆', '')
    if '翻新' in data:
        data = data.replace('翻新', '')
    if '打包' in data:
        data = data.replace('打包', '')
    if '半漆' in data:
        data = data.replace('半漆', '')
    if '烤漆' in data:
        data = data.replace('烤漆', '')
    if '塑修' in data:
        data = data.replace('塑修', '')
    if '矫正' in data:
        data = data.replace('矫正', '')
    if '钣喷' in data:
        data = data.replace('钣喷', '')
    if '喷底漆' in data:
        data = data.replace('喷底漆', '')
    if '维修' in data:
        data = data.replace('维修', '')
    if '镀晶' in data:
        data = data.replace('镀晶', '')
    if '整形' in data:
        data = data.replace('整形', '')
    if '调校' in data:
        data = data.replace('调校', '')
    if '补漆' in data:
        data = data.replace('补漆', '')
    if '半喷' in data:
        data = data.replace('半喷', '')

    if data.strip() == '总成':
        data = '无'
    if '杠' in data:
        if '保险杠' in data:
            if '皮' in data:
                data = '保险杠外皮'
            elif '保险杠骨架' in data:
                data = '保险杠骨架'
            elif '饰板' in data:
                data = '保险杠饰板'
            else:
                if '眉' not in data:
                    data = '保险杠'
        elif '杠包角' in data:
            data = '保险杠包角'
        elif '饰板' in data:
            data = '保险杠饰板'
        else:
            if '眉' not in data:
                data = '保险杠'
    if '裙' in data:
        if not '杠' in data:
            data = '底大边'
    if data.strip() in [
        '门门',
        '三者车',
        '侧侧',
        '喷绘',
        '镀络',
        '材料',
        '事故',
        '全车漆车顶是全景天窗',
        '总成',
        '跟单定损',
        '三者出租车叶喷字',
        '缺额',
        '市场监管徽标',
        '配件录入',
        '门饰条叶饰条',
        '门饰板   轮眉',
        '特殊理赔政策',
        '位'
    ]:
        data = '无'
    return data

#定损项目转换函数
def convertxiangmu(Datas):
    datas = pd.read_excel('file\要替换的配件名称3.31.xlsx')
    fit_name = datas['fit_name']
    fit_name0 = datas['fit_name3']
    DIc = {}
    L = []
    for i in range(len(fit_name0)):
        try:
            DIc[qx(fit_name[i])] = fit_name0[i]
        except:
            pass
    for i in range(len(Datas)):
        Datas[i] = Datas[i].upper()
        try:
            a = DIc[qx(Datas[i])]
            if '裙' in a:
                if not '杠' in a:
                    a = '底大边'
            if '叶子板' == a:
                a = '叶子板(前)'
            L.append(a.upper())#qxxm(
        except:
            # try:
                a = qxxm(Datas[i])
                if '叶子板' == a:
                    a = '叶子板(前)'
                L.append(a.upper())
            # except:
            #     L.append('无')
    return L



#转换国别
def convertguobie(data):
    if not '中国' in data:
        data = '进口'
    return data

"""
输入:
Dsdh:定损单号的列表
Dsxmmc:未转换前的定损项目名称列表
输出:
含补差价的整案定损单号列表
"""
def buchajia(Dsdh,Dsxmmc):
    L2 = []
    for i in range(len(Dsdh)):
        if '差' in Dsxmmc[i] or '增加' in Dsxmmc[i] or '增补' in Dsxmmc[i] or '补偿' in Dsxmmc[i]:
            if not '差速' in Dsxmmc[i]:
                L2.append(Dsdh[i])
    L2 = list(set(L2))
    return L2

"""
输入:
Dsdh:定损单号的列表
Buchalist:含补差价的整案定损单号列表
输出:
定损单号的列表中每个项目是否为补差案件
"""
def sfbc(Dsdh,Buchalist):
    L = []
    for i in range(len(Dsdh)):
        if Dsdh[i] in Buchalist:
            L.append(1)
        else:
            L.append(0)
    return L

def isornot_fitting_barbarism(data):
    if '差' in data:
        if not '差速器' in data:
            data = '无'
    if '费' in data:
        data = '无'
    if '工时' in data:
        data = '无'
    if '自定义' in data:
        data = '无'
    if '标准' in data:
        data = '无'
    data = data.replace(' ', '')
    for i in range(10):
        data = data.replace(str(i), '')
    if len(data) == 0:
        data = '无'
    if '及' in data:
        data = '无'
    if '抛光' in data:
        data = '无'
    if ',' in data:
        data = '无'
    if '追加' in data:
        data = '无'
    if '、' in data:
        data = '无'
    if '工时' in data:
        data = '无'
    if '其' in data:
        data = '无'
    if '和' in data:
        data = '无'
    if '事故' in data:
        data = '无'
    if '材料' in data:
        data = '无'
    if '三者' in data:
        data = '无'
    if '定损' in data:
        data = '无'
    if '缺额' in data:
        data = '无'
    if '含' in data:
        data = '无'
    if '补' in data:
        data = '无'
    if '跟单' in data:
        data = '无'
    if '增加' in data:
        data = '无'
    if '整案' in data:
        data = '无'
    if data == '无':
        return True
    else:
        return False

"""
输入:
Changpai:转换后的厂牌列表
Chexi:转换后的车系列表
输出:
该案件是否厂牌车系录入不规范
"""
def is_brand_invalid(Changpai,Chexi):
    L = []
    for i in range(len(Chexi)):
        if Changpai[i] == '无' or Chexi[i] == '无' or '货车' in Changpai[i] or '摩托' in Changpai[i]:
            L.append(1)
        else:
            L.append(0)
    return L

def classyfichexi(datas1):
    datas2 = pd.read_excel('file\分类结果v2.xlsx',encoding='gbk')
    pp1 = datas1['厂牌']
    cx1 = datas1['车系']
    pp2 = datas2['brand']
    cx2 = datas2['auto_series_chinaname']
    x1 = datas2['品牌分类']
    x2 = datas2['车系调整分类']
    dict1 = {}#品牌
    dict2 = {}#车系
    for i in range(len(pp2)):
        dict1[pp2[i]] = x1[i]
        dict2[pp2[i] + '#' + cx2[i]] = x2[i]
    L1 = []#分类
    # L2 = []#新车系
    for i in range(len(pp1)):
        try:
            L1.append(str(dict2[pp1[i] + '#' + cx1[i]])+cx1[1])
            # L2.append(cx1[i])
        except:
            try:
                L1.append(str(dict1[pp1[i]])+cx1[i])
                # L2.append(pp1[i])
            except:
                # L1.append(0)
                L1.append('空')
                # L2.append(pp1[i])
    return L1

def get_mean(changpai,values):
    Dict = {}
    Ch = list(set(changpai))
    Ch_dict = {}
    c = 0
    for i in range(len(Ch)):
        Ch_dict[Ch[i]] = c
        c += 1
    num = np.ones(len(Ch))
    for i in range(len(changpai)):
        if values[i] > 0:
            if changpai[i] not in Dict:
                Dict[changpai[i]] = values[i]
            else:
                Dict[changpai[i]] += values[i]
                num[Ch_dict[changpai[i]]] += 1
        else:
            Dict[changpai[i]] = 0
    for d in Dict:
        try:
            Dict[d] /= num[Ch_dict[d]]
        except:
            print(Dict[d],num[Ch_dict[d]])
    return Dict

"""
统计频次函数
"""
def all_list(arr):
    result = {}
    for data in arr:
        if not data in result:
            result[data] = 1
        else:
            result[data] += 1
    return result

#————————获取生成喷漆均值————————————
def get_mean_pengqi(changpai, values, pqlx):
    Dict = {}
    Ch = list(set(changpai))
    Ch_dict = {}
    c = 0
    for i in range(len(Ch)):
        Ch_dict[Ch[i]] = c
        c += 1
    num = np.ones(len(Ch))
    for i in range(len(changpai)):
        if '全漆' in pqlx[i]:
            if values[i] < 999999:
                if changpai[i] not in Dict:
                    Dict[changpai[i]] = values[i]
                else:
                    Dict[changpai[i]] += values[i]
                    num[Ch_dict[changpai[i]]] += 1
    for d in Dict:
        Dict[d] /= num[Ch_dict[d]]
    v = []
    for d in changpai:
        try:
            v.append(round(Dict[d] / 100) * 100)
        except KeyError:
            v.append(-1)
    return v

"""
获取喷漆众数
"""
def get_zhengshu_pengqi(changpai, values,pqlx):
    Ch = list(set(changpai))
    Ch_dict = {}
    def get_num(L):
        x = dict((a, L.count(a)) for a in L)
        y = [k for k, v in x.items() if max(x.values()) == v]
        return np.mean(np.array(y))
    for cls in Ch:
        num = []
        for i in range(len(changpai)):
            if '全漆' in pqlx[i]:
                if cls == changpai[i] and values[i] < 999999:
                    num.append(float(values[i]))
        if num != []:
            Ch_dict[cls] = get_num(num)
    v = []
    for d in changpai:
        try:
            v.append(round(Ch_dict[d] / 100) * 100)
        except:
            v.append(-1)
    return v

"""
获取拆装费的均值
"""
def get_mean_caizhaung(changpai,values):
    Dict = {}
    Ch = list(set(changpai))
    Ch_dict = {}
    c = 0
    for i in range(len(Ch)):
        Ch_dict[Ch[i]] = c
        c += 1
    num = np.ones(len(Ch))
    for i in range(len(changpai)):
        if values[i] < 1000:
            if changpai[i] not in Dict:
                Dict[changpai[i]] = values[i]
            else:
                Dict[changpai[i]] += values[i]
                num[Ch_dict[changpai[i]]] += 1
    for d in Dict:
        Dict[d] /= num[Ch_dict[d]]
    v = []
    for d in changpai:
        try:
            v.append(round(Dict[d] / 10) * 10)
        except KeyError:
            v.append(-1)
    return v

"""
获取拆装费的众数
"""
def get_zhengshu_chaizhaung(changpai, values):
    Ch = list(set(changpai))
    Ch_dict = {}
    def get_num(L):
        x = dict((a, L.count(a)) for a in L)
        y = [k for k, v in x.items() if max(x.values()) == v]
        return np.mean(np.array(y))
    for cls in Ch:
        num = []
        for i in range(len(changpai)):
            if cls == changpai[i] and values[i] < 1000:
                num.append(values[i])
        if num != []:
            Ch_dict[cls] = get_num(num)
    v = []
    for d in changpai:
        try:
            v.append(round(Ch_dict[d] / 10) * 10)
        except KeyError:
            v.append(-1)
    return v

"""
获取维修费的均值
"""
def get_mean_weixiu(changpai,values):
    Dict = {}
    Ch = list(set(changpai))
    Ch_dict = {}
    c = 0
    for i in range(len(Ch)):
        Ch_dict[Ch[i]] = c
        c += 1
    num = np.ones(len(Ch))
    for i in range(len(changpai)):
            if values[i] < 10000:
                if changpai[i] not in Dict:
                    Dict[changpai[i]] = values[i]
                else:
                    Dict[changpai[i]] += values[i]
                    num[Ch_dict[changpai[i]]] += 1
    for d in Dict:
        Dict[d] /= num[Ch_dict[d]]
    v = []
    for d in changpai:
        try:
            v.append(round(Dict[d] / 10) * 10)
        except KeyError:
            v.append(-1)
    return v

"""
获取维修费的众数
"""
def get_zhengshu_weixiu(changpai, values):
    Ch = list(set(changpai))
    Ch_dict = {}
    def get_num(L):
        x = dict((a, L.count(a)) for a in L)
        y = [k for k, v in x.items() if max(x.values()) == v]
        return np.mean(np.array(y))
    for cls in Ch:
        num = []
        for i in range(len(changpai)):
                if cls == changpai[i] and values[i] < 10000:
                    num.append(float(values[i]))
        if num != []:
            Ch_dict[cls] = get_num(num)
    v = []
    for d in changpai:
        try:
            v.append(round(Ch_dict[d] / 10) * 10)
        except:
            v.append(-1)
    return v

"""
获取各个省的品牌价格字典
"""
def get_changpai_price(sheng):
    datas1 = pd.read_excel('file\各机构品牌标准价格.xlsx')
    n = datas1['dptname']
    x = datas1['brand_name']
    y = datas1['avg_total_bz']
    Dict1 = {}
    for i in range(len(x)):
        if sheng in n[i]:
            Dict1[x[i]] = y[i]
    return Dict1

"""
获取各个省的车系价格字典
"""
def get_chexi_price(sheng):
    datas1 = pd.read_excel('file\各机构车系标准价格.xlsx')
    n = datas1['dptname']
    x1 = datas1['brand_name']
    x2 = datas1['auto_series_chinaname']
    y = datas1['avg_total_dpt_bz']
    Dict2 = {}
    for i in range(len(x1)):
        if sheng in n[i]:
            Dict2[x1[i] + x2[i]] = y[i]
    return Dict2
from TrainFile import TrainFile
from TestFile import TestFile

if __name__ == '__main__':
    trainfile = TrainFile(task='喷漆',sheng='大连')
    trainfile.make()
    trainfile.train(thread=8)
    testfile = TestFile(task='喷漆',sheng='大连',starttime='2020-03',endtime='2020-04')
    testfile.make()
    testfile.eval()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值