第一步 配置R环境
我调用R执行所需要的包 包括了RServe,DBI
安装包后需library(包名)一下
然后本地启动RServe()
启动过程可能报错缺少其他包,根据提示下载对应的包即可
第二步 Java代码
需要导入的包
org.rosuda.REngine.REXPDouble;
org.rosuda.REngine.REXPGenericVector;
org.rosuda.REngine.REXPMismatchException;
org.rosuda.REngine.REXPString;
org.rosuda.REngine.RList;
org.rosuda.REngine.Rserve.RConnection;
org.rosuda.REngine.Rserve.RserveException;
实现部分全部源码
public class Rservice {
public static void main(String[] args) throws RserveException, REXPMismatchException {
//参数:相似度 sim
String sim="0.4";
//进行本地R连接
RConnection rc=new RConnection("localhost");
//queryCMD为R的执行语句
StringBuffer queryCMD = new StringBuffer();
//拼接R执行语句
queryCMD.append(
"source(\"D:/baoxin/rjava/xingtaipipei/code/curvematch_serve.R\",encoding = \"UTF-8\")\n"
+ "dbaddress<-c(\"jdbc:db2://10.70.50.3:50000/DCDVLP\",\"e6admin\",\"e6admin\")\n"
+ "sqlstrings<-\"select * from E6ADMIN.PL_TCM_HSM_TENSION_TRD_V where DATETIME > '20170525090000' \"\n"
+ "pattern<-c(520.0,520.0,520.0,520.0,510.0,520.0,510.0,520.0,520.0,510.0,520.0,510.0,520.0,520.0,520.0,520.0,530.0,520.0,530.0,520.0,520.0,520.0,530.0,520.0,530.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,530.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0,520.0)\n"
+ "idpart<-\"PCOIL_NO,SAMP_NO\"\n"
+ "normalize.fn<-'normalize.range01'\n"
+ "matchpart<-\"TENS5\"\n"
+ "similarity<-" + sim + "\n"
+ "result<-cur_rjava(pattern, dbaddress, sqlstrings, similarity, idpart,matchpart,normalize.fn =normalize.fn)"
);
System.out.println("R执行语句:\n" + queryCMD.toString() + "\n");
//得到R执行成功返回的结果集result
RList result = rc.eval(queryCMD.toString()).asList();
//获取形态匹配返回结果
REXPGenericVector rv1 = (REXPGenericVector) result.get(1);
RList rList = rv1.asList();
//获取result结果集中的列:sim
REXPString rs2=(REXPString) rList.get(2);
String[] simList= rs2.asStrings();
//输出simList
for (String item : simList) {
System.out.println(item);
}
//关闭R连接
rc.close();
}
}
curvematch_serve.R文件
dist.DTW <- function(x) { dtw(x[1,], x[2,])$distance }
normalize.mean <- function(x) { x - mean(x) }
normalize.mean.sd <- function(x) { (x - mean(x)) / sd(x) }
normalize.range01 <- function(x) {if(max(x)==min(x)){x - min(x) }else{(x - min(x)) / (max(x)-min(x)) } }
normalize.first <- function(x) { x/as.double(x[1]) }
dist.euclidean <- function(x) { stats:::dist(x) }
normalize.dist<-function(x){return(1/(1+x))}
# **********************
#
# **********************
linear_tun<-function(To_inv,From_data){
a = (To_inv[2] - To_inv[1]) /(max(From_data) - min(From_data))
b = To_inv[1] - a * min(From_data)
tun_data<-a*From_data+b
return(tun_data)
}
#******************
#
#******************
plota.legend <- function
(
labels,
fill = NULL,
lastobs = NULL,
x = 'topleft',
merge = F,
bty = 'n',
yformat = plota.format,
...
)
{
if( !is.null(fill) ) fill = spl( as.character(fill) )
labels = spl( as.character(labels) )
if( !is.null(lastobs) ) {
if( is.list(lastobs) ) {
labels1 = sapply(lastobs, function(x) unclass(last(x))[1])
} else {
labels1 = unclass(last(lastobs))[1];
}
labels = paste(labels, match.fun(yformat)( labels1 ))
}
legend(x, legend = labels, fill = fill, merge = merge, bty = bty, ...)
}
# **********************
#
# **********************
spl <- function
(
s,
delim = ','
)
{
return(unlist(strsplit(s,delim)));
}
join <- function
(
v,
delim = ''
)
{
return(paste(v,collapse=delim));
}
#*********************************#
# main
#*********************************#
cur_match_single<- function(Tdata,pattern,n.query=length(pattern), n.reference=length(Tdata), n.match=1, similarity=NULL, normalize.fn = 'normalize.mean', dist.fn = 'dist.euclidean', Index_temp){
query <- pattern[1:n.query]
reference = Tdata[1:n.reference,]
dist.fn.name = ''
if(is.character(dist.fn)) {
dist.fn.name = paste('with',dist.fn)
dist.fn = get(dist.fn)
}
if(is.null(similarity)){#
dist = rep(NA, n.reference)
query.normalized = match.fun(normalize.fn)(query)
normalize.fn1<-'normalize.mean'
query.normalized = match.fun(normalize.fn1)(query.normalized)
for( i in n.query : n.reference ) {
if(Index_temp[i - n.query + 1]==Index_temp[i]){
window = reference[ (i - n.query + 1) : i]
window.normalized = match.fun(normalize.fn)(window)
#**************************************#
normalize.fn1<-'normalize.mean'
window.normalized = match.fun(normalize.fn1)(window.normalized)
# **************************************#
dist[i] = match.fun(dist.fn)(rbind(query.normalized, window.normalized)) }else{
dist[i] =-1
}
}
min.index = c()
temp = dist
temp[temp<0]<-NA
for(i in 1:n.match) {
if(any(!is.na(temp))) {
index = which.min(temp)
min.index[i] = index
temp[max(0,index - n.query+1) : min(n.reference,(index + n.query))] = NA
}
}
n.matched = length(min.index)
return(list(min.index=min.index, dist=dist[min.index],sim=normalize.dist(dist[min.index]), n.matched=n.matched))
}else{#
dist = rep(NA, n.reference)
query.normalized = match.fun(normalize.fn)(query)
normalize.fn1<-'normalize.mean'
query.normalized = match.fun(normalize.fn1)(query.normalized)
sim <- rep(NA, n.reference)
for( i in n.query : n.reference ) {
if(Index_temp[i - n.query + 1]==Index_temp[i]){
window = reference[ (i - n.query + 1) : i]
window.normalized = match.fun(normalize.fn)(window)
#**************************************#
normalize.fn1<-'normalize.mean'
window.normalized = match.fun(normalize.fn1)(window.normalized)
#**************************************#
dist[i] = match.fun(dist.fn)(rbind(query.normalized, window.normalized))
sim[i] <- normalize.dist(dist[i])
# if(sim[i]>=similarity){
# print( paste0(as.character(sim[i]),":",as.character(i - n.query + 1),"to",as.character(i)))
# }
}else{
dist[i] <--1
sim[i] <- 0
}
}
if( max(sim,na.rm = T)<similarity){
n.matched<-0
return(list(n.matched=n.matched))
}else{
min.index = c()
temp = sim
k<-1
while(any(!is.na(temp)) & (max(temp,na.rm = T)>=similarity)){
index = which.max(temp)
min.index[k] = index
temp[max(0,index - n.query+1) : min(n.reference,(index + n.query))] = NA
k<-k+1
}
n.matched = length(min.index)
return(list(min.index=min.index, dist=dist[min.index],sim=sim[min.index], n.matched=n.matched))
}
}
}
#******************#
#
#******************#
cur_match<-function(Mdata,pattern,n.query=length(pattern),n.reference=NULL,n.match=1,similarity=NULL, normalize.fn = 'normalize.mean', dist.fn = 'dist.euclidean',Index_temp){
Tempdata<-as.matrix(Mdata)
# print(head(Tempdata))
Tnum<-dim(Tempdata)[2]
if(is.null(colnames(Tempdata))){
Tnames<-paste0("Col",c(1:Tnum))
}else{
Tnames<-colnames(Tempdata)
}
if(dist.fn=='dist.DTW'){
library('dtw')
}
if(is.null(n.reference)){
n.reference<-dim(Tempdata)[1]
}
if((n.reference<n.query)&(dist.fn!='dist.DTW')){
return(list(n.matched=0))
} else{
dist_al<-c()
index_al<-c()
name_al<-c()
simil_al<-c()
j<-1
n.matched<-0
for(i in Tnames){
Tdata<-Mdata[i]
match_result<-cur_match_single(Tdata=Tdata,pattern=pattern, n.query=n.query,n.reference=n.reference, n.match=n.match, similarity=similarity, normalize.fn = normalize.fn, dist.fn = dist.fn,Index_temp=Index_temp)
# names(match_result)
# print(head(match_result))
if(match_result$n.matched !=0){
index_temp<-match_result$n.matched
dist_al[j:(j+index_temp-1)]<-match_result$dist
index_al[j:(j+index_temp-1)]<-match_result$min.index
simil_al[j:(j+index_temp-1)]<-match_result$sim
name_al[j:(j+index_temp-1)]<-i
j<-j+index_temp
# n.matched
n.matched<-n.matched+match_result$n.matched
}
}
if(n.matched==0){
return(list(n.matched=n.matched))
}else{
use_result<-data.frame(name_al,dist_al,simil_al,findex_al=index_al,index_al)
use_resultorder<-use_result[order(use_result$dist_al),]
#
if(is.null(similarity)){
if(dim(use_result)[1]>=n.match){
use_resultorder<-use_resultorder[1:n.match,]
}
}
use_resultorder$findex_al<- use_resultorder$index_al-n.query+1
colnames(use_resultorder)<-c("name","dist","sim","begin","end")
row.names(use_resultorder)<-NULL
return(list(match_result= use_resultorder,n.matched=n.matched))
}
}
}
cur_rjava <- function (pattern, dbaddress,sqlstrings,similarity=NULL, idpart,matchpart,n.match=1, normalize.fn = 'normalize.mean', dist.fn = 'dist.euclidean'){
if((!is.null(similarity))& ((similarity<0)|(similarity>1))){
return(list(n.matched=0))
}else{
if(class(pattern)=="character"){
pattern<-as.numeric(pattern)
}
options(java.parameters = "-Xss2560k")
library('RJDBC')
drv<-JDBC("com.ibm.db2.jcc.DB2Driver","D:/baoxin/rjava/xingtaipipei/file/db2jcc.jar",identifier.quote="\"")
print("link db2")
conn <- dbConnect(drv, dbaddress[1],dbaddress[2],dbaddress[3])
#print(sqlstrings)
Hdata <- dbGetQuery(conn,sqlstrings)
# print(Hdata)
#
dbDisconnect(conn)
# Hdata<-read.csv('D:/code/R/dtw/rewrite/code/version_5/pic/test/al_data.csv',stringsAsFactors = F)
if(grepl(',',idpart)){
idpart<-unlist(strsplit(unlist(idpart),','))
}
if(grepl(',',matchpart)){
matchpart<-unlist(strsplit(unlist(matchpart),','))
}
Idata <- Hdata[idpart]
Index_temp<-as.numeric(as.factor(Idata[,1]))
Tdata <- Hdata[matchpart]
if(any(is.na(Tdata))){
return(list(n.matched=0))
}else{
print("com start")
cur_result<-cur_match(Mdata=Tdata,pattern=pattern,n.query=length(pattern),n.reference=NULL,n.match=n.match, similarity = similarity, normalize.fn = normalize.fn, dist.fn = dist.fn,Index_temp=Index_temp)
print("com end")
# print()
if(cur_result$n.matched==0){
return(list(n.matched=0))
}else{
match_result<-cur_result$match_result[c("name","dist","sim")]
match_result[,paste(idpart,"begin",sep="_")]<- Idata[cur_result$match_result[,'begin'],]
match_result[,paste(idpart,"end",sep="_")]<- Idata[cur_result$match_result[,'end'],]
# match_result[,paste(idpart[1],"end",sep="_")]<- NULL
match_result<-match_result[order(-match_result[,'sim']),]
match_result[,c("dist","sim")]<-round(match_result[,c("dist","sim")],4)*100
for(i in names(match_result)){
match_result[,i]<-as.character(match_result[,i])
}
# match_result<-as.data.frame( t(match_result))
for(i in names(match_result)){
match_result[,i]<-as.character(match_result[,i])
}
print(cur_result$n.matched)
print("result")
return(list(n.matched=cur_result$n.matched,match_result=match_result) )
}
}
}
}
cur_rjavato01 <- function (dbaddress, sqlstrings){
library('RJDBC')
options(java.parameters = "-Xss2560k")
drv<-JDBC("com.ibm.db2.jcc.DB2Driver","D:/baoxin/rjava/xingtaipipei/file/db2jcc.jar",identifier.quote="\"")
print("link db2")
# conn <- dbConnect(drv, "jdbc:db2://10.70.50.3:50000/DCDVLP","e6admin","e6admin")
conn <- dbConnect(drv, dbaddress[1],dbaddress[2],dbaddress[3])
Hdata <- dbGetQuery(conn,sqlstrings)
print(Hdata)
dbDisconnect(conn)
# select CMP_TEMP as value, SEQ_NO as samp_no from sa_gx_H032_2001 where strip_coil = '126415254100' order by cast( SEQ_NO as int) asc
Hdata[,'VALUE']<-as.character(normalize.range01(Hdata[,'VALUE']))
Hdata<-as.data.frame( t(Hdata))
return(list(Hdata=Hdata))
}
cur_rjavato01df <- function (pattern){
Hdata<-as.data.frame( t(as.character(normalize.range01(pattern))))
return(list(Hdata=Hdata))
}