前言
之前是做python开发的,今年开始做java开发了,之前一直在用pandas,现在突然没有这个工具感觉很不适应,于是想到基于jfinal实现一些通用功能
实现
首先显示数据对象,简单的List< Record>
private final List<Record> recordList;
然后是初始化构造方法,提供了三个,分别是Record,List< Record>和单独一个Collection
public DataFrame(List<Record> recordList) {
this.recordList = recordList;
}
public <T> DataFrame(Collection<T> columnValues,String columnName){
this.recordList=new ArrayList<>();
for (T columnValue:columnValues){
Record record=new Record();
record.set(columnName,columnValue);
this.recordList.add(record);
}
}
public DataFrame(Record record){
this.recordList=new ArrayList<>();
this.recordList.add(record);
}
setColumn方法(设置一列的值,可以传入列表或者单一值)
public <T> void setColumn(String columnName,T value){
for (Record record:recordList){
record.set(columnName,value);
}
}
public <T> void setColumn(String columnName,List<T> value){
if (recordList.size()==value.size()){
for (int i=0;i<recordList.size();i++){
recordList.get(i).set(columnName,value.get(i));
}
}
}
得到行/列/具体某个值的方法
public <T> T getValue(Integer index,String columnName){
if (index>=recordList.size()){
return null;
}
else{
return recordList.get(index).get(columnName);
}
}
public <T> List<T> getColumn(String columnName){
List<T> returnList=new ArrayList<>();
for (Record record:recordList){
returnList.add(record.get(columnName));
}
return returnList;
}
public Record getRow(Integer index){
if (index>=recordList.size()){
return null;
}
else{
return recordList.get(index);
}
}
locEqual方法,得到某一列值=某一确定值的所有行
public <T> DataFrame locEqual(String columnName,T equalValue){
List<Record> returnList=new ArrayList<>();
for (Record record:recordList){
if (equalValue.equals(record.get(columnName))){
returnList.add(record);
}
}
return new DataFrame(returnList);
}
append方法,横向拼接另一个DataFrame
public void appendColumns(DataFrame another) throws Exception {
if (!this.length().equals(another.length())){
throw new Exception("两个DataFrame长度不同");
}
else{
for (int i=0;i<recordList.size();i++){
recordList.get(i).setColumns(another.recordList.get(i));
}
}
}
unique方法,得到某一列去重之后的值
public <T> Set<T> unique(String columnName){
return new HashSet<>(this.getColumn(columnName));
}
sumInteger方法(会对所有Integer列进行sum,需要制定sum出来那列的列名,横向sum)
public <T> void sumIntegerColumn(String sumColumnName){
for (Record record:recordList){
int sumValue=0;
for (String column:record.getColumnNames()){
T value=record.get(column);
if (value instanceof Integer){
sumValue=sumValue+(Integer) value;
}
}
record.set(sumColumnName,sumValue);
}
}
纵向sum方法
public static <T> T sum(T t1, T t2) throws Exception {
if (t1 instanceof Integer && t2 instanceof Integer) {
int tmp = (Integer) t1 + (Integer) t2;
String result = String.valueOf(tmp);
return (T) result;
} else if (t1 instanceof String && t2 instanceof String) {
return (T) ((String) t1 + (String) t2);
} else if (t1 instanceof Double && t2 instanceof Double) {
double tmp = (Double) t1 + (Double) t2;
String result = String.valueOf(tmp);
return (T) result;
}else if (t1 instanceof Float && t2 instanceof Float) {
Float tmp = (Float) t1 + (Float) t2;
String result = String.valueOf(tmp);
return (T) result;
} else {
throw new Exception("Cant use sum for the type!");
}
}
public <T> Record sum(List<String> excludeColumns) throws Exception {
Record record=new Record();
if (length()==0){
return record;
}
else{
for (String column:getColumns()){
if (excludeColumns.contains(column)){
record.set(column,recordList.get(0).get(column));
}
else{
Object value=recordList.get(0).get(column);
for (int i=1;i<recordList.size();i++){
value =sum(value, recordList.get(i).get(column));
}
record.set(column,value);
}
}
}
return record;
}
apply方法(用法有点类似pandas.series的apply,可以直接替换当前列或者命名成新列)
public <T> void apply(String columnName, UnaryOperator<T> operator){
List<T> columnValues=this.getColumn(columnName);
columnValues.replaceAll(operator);
this.setColumn(columnName,columnValues);
}
public <T> void apply(String columnName, String targetColumnName,UnaryOperator<T> operator){
List<T> columnValues=this.getColumn(columnName);
columnValues.replaceAll(operator);
this.setColumn(targetColumnName,columnValues);
}
ordeyBy方法(目前仅支持针对Integer,LocalDateTime,Timestamp和BigDecimal的排序)
public <T> DataFrame orderBy(String columnName,String order){
if (this.length()>0){
T value=this.recordList.get(0).get(columnName);
if (value instanceof Integer){
return orderByInteger(columnName,order);
}
else if (value instanceof LocalDateTime){
return orderByLocalDateTime(columnName,order);
}
else if (value instanceof Timestamp){
return orderByTimeStamp(columnName,order);
}
else if (value instanceof BigDecimal){
return orderByBigDecimal(columnName,order);
}
else{
return this;
}
}
else{
return this;
}
}
private DataFrame orderByTimeStamp(String columnName,String order){
try {
List<Record> finalListRecord = new ArrayList<>();
if ("asc".equalsIgnoreCase(order)) {
List<Timestamp> columnValue = this.getColumn(columnName);
columnValue.sort(Timestamp::compareTo);
Set<Timestamp> values = new HashSet<>();
for (Timestamp value : columnValue) {
if (!values.contains(value)) {
List<Record> recordList = this.locEqual(columnName, value).recordList;
finalListRecord.addAll(recordList);
values.add(value);
}
}
} else {
List<Timestamp> columnValue = this.getColumn(columnName);
columnValue.sort(Timestamp::compareTo);
Collections.reverse(columnValue);
Set<Timestamp> values = new HashSet<>();
for (Timestamp value : columnValue) {
if (!values.contains(value)) {
List<Record> recordList = this.locEqual(columnName, value).recordList;
finalListRecord.addAll(recordList);
values.add(value);
}
}
}
return new DataFrame(finalListRecord);
}
catch (Exception e){
return this;
}
}
private DataFrame orderByLocalDateTime(String columnName,String order){
try {
List<Record> finalListRecord = new ArrayList<>();
if ("asc".equalsIgnoreCase(order)) {
List<LocalDateTime> columnValue = this.getColumn(columnName);
columnValue.sort(LocalDateTime::compareTo);
Set<LocalDateTime> values = new HashSet<>();
for (LocalDateTime value : columnValue) {
if (!values.contains(value)) {
List<Record> recordList = this.locEqual(columnName, value).recordList;
finalListRecord.addAll(recordList);
values.add(value);
}
}
} else {
List<LocalDateTime> columnValue = this.getColumn(columnName);
columnValue.sort(LocalDateTime::compareTo);
Collections.reverse(columnValue);
Set<LocalDateTime> values = new HashSet<>();
for (LocalDateTime value : columnValue) {
if (!values.contains(value)) {
List<Record> recordList = this.locEqual(columnName, value).recordList;
finalListRecord.addAll(recordList);
values.add(value);
}
}
}
return new DataFrame(finalListRecord);
}
catch (Exception e){
return this;
}
}
private DataFrame orderByInteger(String columnName,String order){
try {
List<Record> finalListRecord = new ArrayList<>();
if ("asc".equalsIgnoreCase(order)) {
List<Integer> columnValue = this.getColumn(columnName);
columnValue.sort(Integer::compareTo);
Set<Integer> values = new HashSet<>();
for (Integer value : columnValue) {
if (!values.contains(value)) {
List<Record> recordList = this.locEqual(columnName, value).recordList;
finalListRecord.addAll(recordList);
values.add(value);
}
}
} else {
List<Integer> columnValue = this.getColumn(columnName);
columnValue.sort(Integer::compareTo);
Collections.reverse(columnValue);
Set<Integer> values = new HashSet<>();
for (Integer value : columnValue) {
if (!values.contains(value)) {
List<Record> recordList = this.locEqual(columnName, value).recordList;
finalListRecord.addAll(recordList);
values.add(value);
}
}
}
return new DataFrame(finalListRecord);
}
catch (Exception e){
return this;
}
}
private DataFrame orderByBigDecimal(String columnName,String order){
try {
List<Record> finalListRecord = new ArrayList<>();
if ("asc".equalsIgnoreCase(order)) {
List<BigDecimal> columnValue = this.getColumn(columnName);
columnValue.sort(BigDecimal::compareTo);
Set<BigDecimal> values = new HashSet<>();
for (BigDecimal value : columnValue) {
if (!values.contains(value)) {
List<Record> recordList = this.locEqual(columnName, value).recordList;
finalListRecord.addAll(recordList);
values.add(value);
}
}
} else {
List<BigDecimal> columnValue = this.getColumn(columnName);
columnValue.sort(BigDecimal::compareTo);
Collections.reverse(columnValue);
Set<BigDecimal> values = new HashSet<>();
for (BigDecimal value : columnValue) {
if (!values.contains(value)) {
List<Record> recordList = this.locEqual(columnName, value).recordList;
finalListRecord.addAll(recordList);
values.add(value);
}
}
}
return new DataFrame(finalListRecord);
}
catch (Exception e){
return this;
}
}
左连接方法(目前只做了左连接还没做其他的链接)
public DataFrame mergeLeft(DataFrame another,String columnName){
if (another.length()==0){
return this;
}
String[] otherColumns=another.recordList.get(0).getColumnNames();
List<Record> finalRecordList=new ArrayList<>();
for (Object value:this.unique(columnName)){
List<Record> recordList=this.locEqual(columnName,value).recordList;
for (Record record:recordList){
List<Record> otherRecords=another.locEqual(columnName,value).recordList;
if (otherRecords.size()==0){
for (String columnSet:otherColumns){
if (!columnSet.equals(columnName)) {
record.set(columnSet,null);
}
}
finalRecordList.add(record);
}
else{
for (Record otherRecord:otherRecords){
otherRecord.setColumns(record);
}
finalRecordList.addAll(otherRecords);
}
}
}
return new DataFrame(finalRecordList);
}
总结
这只是一个为了某些需求自制的一个小工具,还在不断迭代中,也希望大家可以多提提建议,过段时间应该会传到github上去