基于我目前开发了几个影刀RPA的感受及 python性能提升的研究,及对比同样功能java实现
感觉提升RPA途径:
1、可以使用Excel的公式,这个效率还行,能充分利用计算机的CPU的线程、进程。
2、java或c#开发的应用程序, java稍微麻烦一点要安装java JDK 并且要配置号路径,设定一个批处理文件即可,
3、影刀的for 或 python的其他循环的方法,也效果不好,我测试是要 5个小时的,用java实现 90秒即可实现。速度是python常规方法的200倍,是使用python加速方法(apply())的128倍,我测试时数据是这样子的,网上大咖说python某些场景下,比其他开发语言慢200-300倍,是一致的,毕竟他是解释性语言,不是编译的,每一次都要解释之后才能运行。你有多少此循环,就有多少次解释,然后再执行。
说说我 python 提速的途径:,
a、我试过taichi,不支持字符串,仅支持纯数值
b、pypy3安装有问题,没安装上,
c、其他的加速工具,要么重新定义新的数据类型,要么改写之前写好的python代码。
附上我python 测试的几段代码,写得比较乱,但是主要方法发都有的:
# import pandas as pd
# import numpy as np
import pandas as pd # modin.
import time # 引入time模块
# import taichi as ti
# ti.init()
# from distributed import Client
# client = Client()
# df = pd.DataFrame({'month': [1, 4, 7, 10],
# 'year': [2012, 2014, 2013, 2014],
# 'sale': [55, 40, 84, 31]})
# print(df)
# df.set_index('year') #如果索引列有数据重复也没关系
# print(df)
# aa = np.random.randn(6)
# print('aa:{0}',aa)
# --------------------------------------
# df = pd.DataFrame({'a': np.random.randn(6),
# 'b': ['foo', 'bar'] * 3,
# 'c': np.random.randn(6)})
# print('----------------')
# def my_test(a, b):
# return a + b
#
# print(df)
# df['Value'] = df.apply(lambda row: my_test(row['a'], row['c']), axis=1,result_type=None) # 方法1
# print(df)
#
# df['Value2'] = df['a'] + df['c'] # 方法2
# print(df)
# ------------------------------------
dfListFromDb = pd.read_csv('D:\listFromDb.csv',header=None,usecols=[0,1,2,3]) # ,index_col=0 ,names=['OrderId','BelongTime','ShopCode','ShopCname'] ,index_col=0
# print('dfListFromDb:'+str(dfListFromDb))
# dfListFromDb.reindex(level=None, drop=False, inpalce=False, col_level=0, col_fill=' ')
# dfListFromDb.reindex(range(len(dfListFromDb)))
# dfListFromDb.reset_index(inplace=True,drop=True)
# print('dfListFromDb:' + str(dfListFromDb.index.names))
# print(dfListFromDb.to_string())
dfexcelData2Abstract = pd.read_csv('D:\excelData2Abstract.csv',header=None,skiprows=[0],usecols=[0,1]) # ,index_col=1 ,index_col=0
# dfexcelData2Abstract.reset_index(level=None, drop=False, inpalce=False, col_level=0, col_fill=' ')
# dfexcelData2Abstract.reset_index(inplace=True,drop=True)
# dfexcelData2Abstract.reindex(range(len(dfexcelData2Abstract)))
# print('dfListFromDb:' + str(dfexcelData2Abstract.index.names)) #
# print('dfexcelData2Abstract.to_string():'+str(dfexcelData2Abstract.to_string()))
# print(dfexcelData2Abstract.to_string())
# print('dfexcelData2Abstract:'+str(dfexcelData2Abstract))
print('xxxxxxxxxxx')
dfexcelData2Abstract2 = dfexcelData2Abstract.copy(True)
# @ti.func
def ForloopDoubleLIstRunTime(dfListFromDb, dfexcelData2Abstract2):
# print('dfexcelData2Abstract2:', dfexcelData2Abstract)
# dfexcelData2Abstract2.iloc[:, 1:] = np.nan
# print('aaaaaa')
# print('After np.nan:dfexcelData2Abstract2:' + str(dfexcelData2Abstract2)) # range
ticks = time.time()
for x in range(len(dfexcelData2Abstract.iloc[:, 0])): # dfexcelData2Abstract2.iloc[:,0]:
# print(str(x))
for y in range(len(dfListFromDb.iloc[:, 0])):
if str(dfexcelData2Abstract.iloc[x, 0]).__contains__(str(dfListFromDb.iloc[y, 0])):
# dfexcelData2Abstract2.loc[y,1]=dfListFromDb.loc[y, 1]
# print(str(dfexcelData2Abstract2.iloc[x,0])+'#'+str(dfListFromDb.iloc[y,0])+'相等')
break
# print('dfListFromDb.loc['+str(y)+', 1]:'+str(dfListFromDb.iloc[y, 1]))
# print('dfexcelData2Abstract2.loc['+str(x)+', 1]:'+str(dfexcelData2Abstract2.iloc[x, 1]))
# print('dfListFromDb.loc['+str(y)+', 2]:'+str(dfListFromDb.iloc[y, 2]))
# print('dfexcelData2Abstract2.loc['+str(x)+', 2]:'+str(dfexcelData2Abstract2.iloc[x, 1]))
ticks2 = (time.time() - ticks) / 60
print('ForloopDoubleLIstRunTime 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
list24result = []
# AA='准备补充单号为空的数据:str(list24result[11]).find("基金代发任务"):'+str(list24result[11].find("基金代发任务"))+'#str(list24result[11]).find("订单编号"):'+str(str(list24result[11]).find("订单编号"))+'#str(list24result[21]):'+str(list24result[21])
ForloopDoubleLIstRunTime(dfListFromDb, dfexcelData2Abstract2)
print(str(time.process_time())) #120second 263, 208 132 116 99 98
# @ti.func
def DoubleLIstRunTime_iterrows(dfListFromDb, dfexcelData2Abstract2):
ticks = time.time()
# dfListFromDb.convert_dtypes()
# dfexcelData2Abstract2.convert_dtypes()
for index, dfexcelData2Abstract2_row in dfexcelData2Abstract2.iterrows():
# 获取用电量和时间(小时)
# myrow = str(row[0])
# print('dfexcelData2Abstract2_row:'+str(dfexcelData2Abstract2_row))
for indexdb,dfListFromDb_rowdb in dfListFromDb.iterrows():
# print('dfListFromDb_rowdb:'+str(dfListFromDb_rowdb))
if str(dfexcelData2Abstract2_row[0]).__contains__(str(dfListFromDb_rowdb[0])):
print('index:'+str(index)+'str(dfexcelData2Abstract2_row[0]):'+str(dfexcelData2Abstract2_row[0])+'#'+'str(dfListFromDb_rowdb[0]):'+str(dfListFromDb_rowdb[0]))
break
# hour = row['date_time'].hour
# 添加cost列表
# energy_cost = apply_tariff(energy_used, hour)
# energy_cost_list.append(energy_cost)
# df['cost_cents'] = energy_cost_list
ticks2 = (time.time() - ticks) / 60
print('DoubleLIstRunTime_iterrows 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
# DoubleLIstRunTime_iterrows(dfListFromDb, dfexcelData2Abstract2) #123secend
# print(str(time.process_time())) #377 95 117 168 160
# @ti.kernel
def DoubleLIstRunTime_itertuples(dfListFromDb , dfexcelData2Abstract2):
ticks = time.time()
# dfListFromDb.convert_dtypes()
# dfexcelData2Abstract2.convert_dtypes()
for index, dfexcelData2Abstract2_row in dfexcelData2Abstract2.iterrows():
# 获取用电量和时间(小时)
# myrow = str(dfexcelData2Abstract2[0])
print('外层循环dfexcelData2Abstract2Index:'+str(index)+'#dfexcelData2Abstract2_row:'+str(dfexcelData2Abstract2_row))
i = 0
for rowdb in dfListFromDb.itertuples():
i=i+1
if str(dfexcelData2Abstract2_row[0]).__contains__(str(getattr(rowdb,'_1'))):
# print('内层循环TuplesIndex:'+str(i)+'#str(dfexcelData2Abstract2_row[0]):'+str(dfexcelData2Abstract2_row[0])+'#'+"str(getattr(rowdb['_1'])):"+str(getattr(rowdb,'_1')))
break
# list_itertuples(dfexcelData2Abstract2_row,dfListFromDb)
# hour = row['date_time'].hour
# 添加cost列表
# energy_cost = apply_tariff(energy_used, hour)
# energy_cost_list.append(energy_cost)
# df['cost_cents'] = energy_cost_list
ticks2 = (time.time() - ticks) / 60
print('DoubleLIstRunTime_iterrows 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
# DoubleLIstRunTime_itertuples(dfListFromDb, dfexcelData2Abstract2) #123secend
# print(str(time.process_time())) #377 95 117 168 160
def testfindInApply(tempRow):
i =0
# print('外层循环tempRow:' + str(i) + '#str(tempRow):' + str(tempRow) + '#' )
for rowdb in dfListFromDb.itertuples():
i = i + 1
if str(tempRow[0]).__contains__(str(getattr(rowdb, '_1'))):
# print('内层循环TuplesIndex:'+str(i)+'#str(tempRow[0]):'+str(tempRow[0])+'#'+"str(getattr(rowdb['_1'])):"+str(getattr(rowdb,'_1')))
tempRow[2]=str(getattr(rowdb, '_1'))
return True
# else:
# return False
# print(dfexcelData2Abstract2)
# dfexcelData2Abstract2[2]='-----------------------------------'
# print(dfexcelData2Abstract2)
# ticks = time.time()
# dfexcelData2Abstract2.apply(testfindInApply,axis=1)
# ticks2 = (time.time() - ticks) / 60
# print('dfexcelData2Abstract2.apply 耗时:{}'.format(time.time() - ticks)) # +str(ticks2))
# print(dfexcelData2Abstract2)
# dfexcelData2Abstract2.to_csv("d:\dfexcelData2Abstract2Output.csv")
java代码:
package com.company;
import javax.swing.*;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static sun.misc.Version.print;
import static sun.misc.Version.println;
public class DbListModify {
static String[][] arrexcelData2Abstract;
static String[][] arrlistFromDb;
static File file;
static File fileCsv;
static SimpleDateFormat df;
public static void main(String[] args) {
// write your code here
int IntCsvexcelData2AbstractLength=0;
int IntCsvlistFromDbLength=0;
df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS");
try {
String content = "";
file =new File("D:\\test_appendfile31"+df.format(System.currentTimeMillis()).toString().replace(":","").replace("/","")+".txt");
fileCsv=new File("D:\\DbListModify.csv");
if(!file.exists()){
file.createNewFile();
}
if(!fileCsv.exists()){
file.createNewFile();
}
//trueappend file
FileWriter fileWritter = new FileWriter(file.getAbsoluteFile(),false);//.getAbsoluteFile() getName
FileWriter CsvfileWritter = new FileWriter(fileCsv.getAbsoluteFile(), false);//.getAbsoluteFile() getName 数据专用文件
fileWritter.write(content);
//(),
BufferedReader readerexcelData2Abstract = new BufferedReader(new InputStreamReader(new FileInputStream("d:\\excelData2Abstract.csv"),"gb2312"));//GBK
// reader.readLine();//,
// System.out.println(reader.readLine());
String lineexcelData2Abstract = null;
int i=0;
while((lineexcelData2Abstract=readerexcelData2Abstract.readLine())!=null){
String itemexcelData2Abstract[] = lineexcelData2Abstract.split(",");//CSV抽象文件
String last = itemexcelData2Abstract[itemexcelData2Abstract.length-1];//CSV数据库文件
i=i+1;
}
IntCsvexcelData2AbstractLength=i;
fileWritter.write("IntCsvexcelData2AbstractLength:"+IntCsvexcelData2AbstractLength+"\r\n");
readexcelData2AbstractCSV("d:\\excelData2Abstract.csv",IntCsvexcelData2AbstractLength,3);
System.out.println("itemexcelData2Abstract"+df.format(System.currentTimeMillis()).toString());
fileWritter.write("itemexcelData2Abstract"+df.format(System.currentTimeMillis()).toString()+"\r\n");
i=0;
//(),
BufferedReader readerlistFromDb = new BufferedReader(new InputStreamReader(new FileInputStream("d:\\listFromDb.csv"), "utf-8"));//GBK
// reader.readLine();//,
// System.out.println(reader.readLine());
String linelistFromDb = null;
while((linelistFromDb=readerlistFromDb.readLine())!=null){
String itemlistFromDb[] = linelistFromDb.split(",");//CSV,,
String last = itemlistFromDb[itemlistFromDb.length-1];//CSV,-1
i=i+1;
}
IntCsvlistFromDbLength=i;
fileWritter.write("IntCsvlistFromDbLength:"+IntCsvlistFromDbLength+"\r\n");
readlistFromDbCSV("d:\\listFromDb.csv",IntCsvexcelData2AbstractLength,3);
System.out.println("arrlistFromDb"+df.format(System.currentTimeMillis()).toString());
fileWritter.write("CsvexcelData2Abstract赋值开始:"+df.format(System.currentTimeMillis()).toString()+"\r\n");
for(int inta=0;inta<IntCsvexcelData2AbstractLength-2;inta++)
{
for(int intb=0;intb<IntCsvlistFromDbLength;intb++)
{
if (arrexcelData2Abstract[inta][0].toString().trim().contains(arrlistFromDb[intb][0].toString().trim()))
{
// System.out.println("arrexcelData2Abstract[inta][0].toString().trim():"+arrexcelData2Abstract[inta][0].toString().trim()+"#arrlistFromDb[intb][0].toString().trim():"+arrlistFromDb[intb][0].toString().trim());
arrexcelData2Abstract[inta][2]=arrlistFromDb[intb][1].toString().trim();
break;
}
}
}
fileWritter.write("CsvexcelData2Abstract赋值结束:写入CSV开始"+df.format(System.currentTimeMillis()).toString()+"\r\n");
StringBuilder stringBuilder =new StringBuilder();
for(int inta=0;inta<IntCsvexcelData2AbstractLength-2;inta++)
{
// CsvfileWritter.write(arrexcelData2Abstract[inta][0].toString()+","+arrexcelData2Abstract[inta][1].toString()+","+arrexcelData2Abstract[inta][2].toString()+","+"\r\n");
// fileWritter.write(""+arrexcelData2Abstract[inta][0].toString()+","+arrexcelData2Abstract[inta][1].toString()+","+arrexcelData2Abstract[inta][2].toString()+","+"\r\n");
stringBuilder.append(arrexcelData2Abstract[inta][0].toString()+","+arrexcelData2Abstract[inta][1].toString()+","+arrexcelData2Abstract[inta][2].toString()+","+"\r\n");
System.out.println(inta);
}
CsvfileWritter.write(stringBuilder.toString());
fileWritter.write("写入CSV完成"+df.format(System.currentTimeMillis()).toString()+"\r\n");
fileWritter.flush(); fileWritter.close();
CsvfileWritter.flush();CsvfileWritter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
private static void readexcelData2AbstractCSV(String pFilename,int IntCsvexcelData2AbstractLength,int intcollen) {
int i=0;
try {
BufferedReader br = new BufferedReader(new FileReader(pFilename));
arrexcelData2Abstract = new String[IntCsvexcelData2AbstractLength][intcollen];
// BufferedReader br = new BufferedReader(new FileReader(pFilename));
String line = " ";
String [] temp;
while ((line = br.readLine())!= null){
temp = line.split(","); //split spaces
for (int j = 0; j<3; j++) {
if (j==2)
{
arrexcelData2Abstract[i][j] ="";
}
else
arrexcelData2Abstract[i][j] =temp[j];
// System.out.println("readexcelData2AbstractCSVtemp["+j+"]:"+temp[j].toString());
}
// System.out.println("arr["+i+"]:"+arr[i].toString());
System.out.println("i:"+i);
// System.out.println("temp:"+temp.);
i=i+1;
if (i==IntCsvexcelData2AbstractLength-2)
return;
}
// for (int m=0;m<introwlen;m++) {
// System.out.print(arr[m].toString());
// }
}catch (Exception e) {
e.printStackTrace();
}
finally{
}
// return arr;
}
private static void readlistFromDbCSV(String pFilename,int IntCsvlistFromDbLength,int intcollen) {
int i=0;
try {
BufferedReader br = new BufferedReader(new FileReader(pFilename));
arrlistFromDb = new String[IntCsvlistFromDbLength][intcollen];
// BufferedReader br = new BufferedReader(new FileReader(pFilename));
String line = " ";
String [] temp;
while ((line = br.readLine())!= null){
temp = line.split(","); //split spaces
for (int j = 0; j<2; j++) {
arrlistFromDb[i][j] =temp[j];
System.out.println("readlistFromDbCSVtemp["+j+"]:"+temp[j].toString());
}
// System.out.println("arr["+i+"]:"+arr[i].toString());
System.out.println("i:"+i);
// System.out.println("temp:"+temp.);
i=i+1;
// if (i==introwlen-2)
// return;
}
// for (int m=0;m<introwlen;m++) {
// System.out.print(arr[m].toString());
// }
}catch (Exception e) {
e.printStackTrace();
}
finally{
}
// return arr;
}
}