企业要求数据表的数据更新后能够实时的被搜索引擎搜索到,查找solr的DataImport的文档提到了一个定时器实现这种实时要求的解决方案
实现方法:
1 配置监听器
web.xml
<
listener
>
< listener-class >
org.apache.solr.handler.dataimport.scheduler.ApplicationListener
</ listener-class >
< listener-class >
org.apache.solr.handler.dataimport.scheduler.ApplicationListener
</ listener-class >
</listener>
2 引入jar文件
注:如果用的是jre6, 官方下载的jar文件要重新编译,貌似是版本不兼容
jar文件包括三个类
(1) 监听器ApplicationListener
package org.apache.solr.handler.dataimport.scheduler;
import java.util.Calendar;
import java.util.Date;
import java.util.Timer;
import javax.servlet.ServletContext;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ApplicationListener implements ServletContextListener {
private static final Logger logger = LoggerFactory.getLogger(ApplicationListener. class);
@Override
public void contextDestroyed(ServletContextEvent servletContextEvent) {
ServletContext servletContext = servletContextEvent.getServletContext();
// get our timer from the context
Timer timer = (Timer)servletContext.getAttribute("timer");
// cancel all active tasks in the timers queue
if (timer != null)
timer.cancel();
// remove the timer from the context
servletContext.removeAttribute("timer");
}
@Override
public void contextInitialized(ServletContextEvent servletContextEvent) {
ServletContext servletContext = servletContextEvent.getServletContext();
try{
// create the timer and timer task objects
Timer timer = new Timer();
HTTPPostScheduler task = new HTTPPostScheduler(servletContext.getServletContextName(), timer);
// get our interval from HTTPPostScheduler
int interval = task.getIntervalInt();
// get a calendar to set the start time (first run)
Calendar calendar = Calendar.getInstance();
// set the first run to now + interval (to avoid fireing while the app/server is starting)
calendar.add(Calendar.MINUTE, interval);
Date startTime = calendar.getTime();
// schedule the task
timer.scheduleAtFixedRate(task, startTime, 1000 * 60 * interval);
// save the timer in context
servletContext.setAttribute("timer", timer);
} catch (Exception e) {
if(e.getMessage().endsWith("disabled")){
logger.info("Schedule disabled");
} else{
logger.error("Problem initializing the scheduled task: ", e);
}
}
}
}
(2)定时任务HttpPostScheduler
package org.apache.solr.handler.dataimport.scheduler;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Timer;
import java.util.TimerTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HTTPPostScheduler extends TimerTask {
private String syncEnabled;
private String[] syncCores;
private String server;
private String port;
private String webapp;
private String params;
private String interval;
private String cores;
private SolrDataImportProperties p;
private boolean singleCore;
private static final Logger logger = LoggerFactory.getLogger(HTTPPostScheduler. class);
public HTTPPostScheduler(String webAppName, Timer t) throws Exception{
// load properties from global dataimport.properties
p = new SolrDataImportProperties();
reloadParams();
fixParams(webAppName);
if(!syncEnabled.equals("1")) throw new Exception("Schedule disabled");
if(syncCores == null || (syncCores.length == 1 && syncCores[0].isEmpty())){
singleCore = true;
logger.info("<index update process> Single core identified in dataimport.properties");
} else{
singleCore = false;
logger.info("<index update process> Multiple cores identified in dataimport.properties. Sync active for: " + cores);
}
}
private void reloadParams(){
p.loadProperties( true);
syncEnabled = p.getProperty(SolrDataImportProperties.SYNC_ENABLED);
cores = p.getProperty(SolrDataImportProperties.SYNC_CORES);
server = p.getProperty(SolrDataImportProperties.SERVER);
port = p.getProperty(SolrDataImportProperties.PORT);
webapp = p.getProperty(SolrDataImportProperties.WEBAPP);
params = p.getProperty(SolrDataImportProperties.PARAMS);
interval = p.getProperty(SolrDataImportProperties.INTERVAL);
syncCores = cores != null ? cores.split(",") : null;
}
private void fixParams(String webAppName){
if(server == null || server.isEmpty()) server = "localhost";
if(port == null || port.isEmpty()) port = "8080";
if(webapp == null || webapp.isEmpty()) webapp = webAppName;
if(interval == null || interval.isEmpty() || getIntervalInt() <= 0) interval = "30";
}
public void run() {
try{
// check mandatory params
if(server.isEmpty() || webapp.isEmpty() || params == null || params.isEmpty()){
logger.warn("<index update process> Insuficient info provided for data import");
logger.info("<index update process> Reloading global dataimport.properties");
reloadParams();
// single-core
} else if(singleCore){
prepUrlSendHttpPost();
// multi-core
} else if(syncCores.length == 0 || (syncCores.length == 1 && syncCores[0].isEmpty())){
logger.warn("<index update process> No cores scheduled for data import");
logger.info("<index update process> Reloading global dataimport.properties");
reloadParams();
} else{
for(String core : syncCores){
prepUrlSendHttpPost(core);
}
}
} catch(Exception e){
logger.error("Failed to prepare for sendHttpPost", e);
reloadParams();
}
}
private void prepUrlSendHttpPost(){
String coreUrl = "http://" + server + ":" + port + "/" + webapp + params;
sendHttpPost(coreUrl, null);
}
private void prepUrlSendHttpPost(String coreName){
String coreUrl = "http://" + server + ":" + port + "/" + webapp + "/" + coreName + params;
sendHttpPost(coreUrl, coreName);
}
private void sendHttpPost(String completeUrl, String coreName){
DateFormat df = new SimpleDateFormat("dd.MM.yyyy HH:mm:ss SSS");
Date startTime = new Date();
// prepare the core var
String core = coreName == null ? "" : "[" + coreName + "] ";
logger.info(core + "<index update process> Process started at .............. " + df.format(startTime));
try{
URL url = new URL(completeUrl);
HttpURLConnection conn = (HttpURLConnection)url.openConnection();
conn.setRequestMethod("POST");
conn.setRequestProperty("type", "submit");
conn.setDoOutput( true);
// Send HTTP POST
conn.connect();
logger.info(core + "<index update process> Request method\t\t\t" + conn.getRequestMethod());
logger.info(core + "<index update process> Succesfully connected to server\t" + server);
logger.info(core + "<index update process> Using port\t\t\t" + port);
logger.info(core + "<index update process> Application name\t\t\t" + webapp);
logger.info(core + "<index update process> URL params\t\t\t" + params);
logger.info(core + "<index update process> Full URL\t\t\t\t" + conn.getURL());
logger.info(core + "<index update process> Response message\t\t\t" + conn.getResponseMessage());
logger.info(core + "<index update process> Response code\t\t\t" + conn.getResponseCode());
// listen for change in properties file if an error occurs
if(conn.getResponseCode() != 200){
reloadParams();
}
conn.disconnect();
logger.info(core + "<index update process> Disconnected from server\t\t" + server);
Date endTime = new Date();
logger.info(core + "<index update process> Process ended at ................ " + df.format(endTime));
} catch(MalformedURLException mue){
logger.error("Failed to assemble URL for HTTP POST", mue);
} catch(IOException ioe){
logger.error("Failed to connect to the specified URL while trying to send HTTP POST", ioe);
} catch(Exception e){
logger.error("Failed to send HTTP POST", e);
}
}
public int getIntervalInt() {
try{
return Integer.parseInt(interval);
} catch(NumberFormatException e){
logger.warn("Unable to convert 'interval' to number. Using default value (30) instead", e);
return 30; // return default in case of error
}
}
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Timer;
import java.util.TimerTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HTTPPostScheduler extends TimerTask {
private String syncEnabled;
private String[] syncCores;
private String server;
private String port;
private String webapp;
private String params;
private String interval;
private String cores;
private SolrDataImportProperties p;
private boolean singleCore;
private static final Logger logger = LoggerFactory.getLogger(HTTPPostScheduler. class);
public HTTPPostScheduler(String webAppName, Timer t) throws Exception{
// load properties from global dataimport.properties
p = new SolrDataImportProperties();
reloadParams();
fixParams(webAppName);
if(!syncEnabled.equals("1")) throw new Exception("Schedule disabled");
if(syncCores == null || (syncCores.length == 1 && syncCores[0].isEmpty())){
singleCore = true;
logger.info("<index update process> Single core identified in dataimport.properties");
} else{
singleCore = false;
logger.info("<index update process> Multiple cores identified in dataimport.properties. Sync active for: " + cores);
}
}
private void reloadParams(){
p.loadProperties( true);
syncEnabled = p.getProperty(SolrDataImportProperties.SYNC_ENABLED);
cores = p.getProperty(SolrDataImportProperties.SYNC_CORES);
server = p.getProperty(SolrDataImportProperties.SERVER);
port = p.getProperty(SolrDataImportProperties.PORT);
webapp = p.getProperty(SolrDataImportProperties.WEBAPP);
params = p.getProperty(SolrDataImportProperties.PARAMS);
interval = p.getProperty(SolrDataImportProperties.INTERVAL);
syncCores = cores != null ? cores.split(",") : null;
}
private void fixParams(String webAppName){
if(server == null || server.isEmpty()) server = "localhost";
if(port == null || port.isEmpty()) port = "8080";
if(webapp == null || webapp.isEmpty()) webapp = webAppName;
if(interval == null || interval.isEmpty() || getIntervalInt() <= 0) interval = "30";
}
public void run() {
try{
// check mandatory params
if(server.isEmpty() || webapp.isEmpty() || params == null || params.isEmpty()){
logger.warn("<index update process> Insuficient info provided for data import");
logger.info("<index update process> Reloading global dataimport.properties");
reloadParams();
// single-core
} else if(singleCore){
prepUrlSendHttpPost();
// multi-core
} else if(syncCores.length == 0 || (syncCores.length == 1 && syncCores[0].isEmpty())){
logger.warn("<index update process> No cores scheduled for data import");
logger.info("<index update process> Reloading global dataimport.properties");
reloadParams();
} else{
for(String core : syncCores){
prepUrlSendHttpPost(core);
}
}
} catch(Exception e){
logger.error("Failed to prepare for sendHttpPost", e);
reloadParams();
}
}
private void prepUrlSendHttpPost(){
String coreUrl = "http://" + server + ":" + port + "/" + webapp + params;
sendHttpPost(coreUrl, null);
}
private void prepUrlSendHttpPost(String coreName){
String coreUrl = "http://" + server + ":" + port + "/" + webapp + "/" + coreName + params;
sendHttpPost(coreUrl, coreName);
}
private void sendHttpPost(String completeUrl, String coreName){
DateFormat df = new SimpleDateFormat("dd.MM.yyyy HH:mm:ss SSS");
Date startTime = new Date();
// prepare the core var
String core = coreName == null ? "" : "[" + coreName + "] ";
logger.info(core + "<index update process> Process started at .............. " + df.format(startTime));
try{
URL url = new URL(completeUrl);
HttpURLConnection conn = (HttpURLConnection)url.openConnection();
conn.setRequestMethod("POST");
conn.setRequestProperty("type", "submit");
conn.setDoOutput( true);
// Send HTTP POST
conn.connect();
logger.info(core + "<index update process> Request method\t\t\t" + conn.getRequestMethod());
logger.info(core + "<index update process> Succesfully connected to server\t" + server);
logger.info(core + "<index update process> Using port\t\t\t" + port);
logger.info(core + "<index update process> Application name\t\t\t" + webapp);
logger.info(core + "<index update process> URL params\t\t\t" + params);
logger.info(core + "<index update process> Full URL\t\t\t\t" + conn.getURL());
logger.info(core + "<index update process> Response message\t\t\t" + conn.getResponseMessage());
logger.info(core + "<index update process> Response code\t\t\t" + conn.getResponseCode());
// listen for change in properties file if an error occurs
if(conn.getResponseCode() != 200){
reloadParams();
}
conn.disconnect();
logger.info(core + "<index update process> Disconnected from server\t\t" + server);
Date endTime = new Date();
logger.info(core + "<index update process> Process ended at ................ " + df.format(endTime));
} catch(MalformedURLException mue){
logger.error("Failed to assemble URL for HTTP POST", mue);
} catch(IOException ioe){
logger.error("Failed to connect to the specified URL while trying to send HTTP POST", ioe);
} catch(Exception e){
logger.error("Failed to send HTTP POST", e);
}
}
public int getIntervalInt() {
try{
return Integer.parseInt(interval);
} catch(NumberFormatException e){
logger.warn("Unable to convert 'interval' to number. Using default value (30) instead", e);
return 30; // return default in case of error
}
}
}
(3) 属性文件类SolrDataImportProperties
package org.apache.solr.handler.dataimport.scheduler;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Properties;
import org.apache.solr.core.SolrResourceLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SolrDataImportProperties {
private Properties properties;
public static final String SYNC_ENABLED = "syncEnabled";
public static final String SYNC_CORES = "syncCores";
public static final String SERVER = "server";
public static final String PORT = "port";
public static final String WEBAPP = "webapp";
public static final String PARAMS = "params";
public static final String INTERVAL = "interval";
private static final Logger logger = LoggerFactory.getLogger(SolrDataImportProperties. class);
public SolrDataImportProperties(){
// loadProperties(true);
}
public void loadProperties( boolean force){
try{
SolrResourceLoader loader = new SolrResourceLoader( null);
logger.info("Instance dir = " + loader.getInstanceDir());
String configDir = loader.getConfigDir();
configDir = SolrResourceLoader.normalizeDir(configDir);
if(force || properties == null){
properties = new Properties();
String dataImportPropertiesPath = configDir + "\\dataimport.properties";
FileInputStream fis = new FileInputStream(dataImportPropertiesPath);
properties.load(fis);
}
} catch(FileNotFoundException fnfe){
logger.error("Error locating DataImportScheduler dataimport.properties file", fnfe);
} catch(IOException ioe){
logger.error("Error reading DataImportScheduler dataimport.properties file", ioe);
} catch(Exception e){
logger.error("Error loading DataImportScheduler properties", e);
}
}
public String getProperty(String key){
return properties.getProperty(key);
}
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Properties;
import org.apache.solr.core.SolrResourceLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SolrDataImportProperties {
private Properties properties;
public static final String SYNC_ENABLED = "syncEnabled";
public static final String SYNC_CORES = "syncCores";
public static final String SERVER = "server";
public static final String PORT = "port";
public static final String WEBAPP = "webapp";
public static final String PARAMS = "params";
public static final String INTERVAL = "interval";
private static final Logger logger = LoggerFactory.getLogger(SolrDataImportProperties. class);
public SolrDataImportProperties(){
// loadProperties(true);
}
public void loadProperties( boolean force){
try{
SolrResourceLoader loader = new SolrResourceLoader( null);
logger.info("Instance dir = " + loader.getInstanceDir());
String configDir = loader.getConfigDir();
configDir = SolrResourceLoader.normalizeDir(configDir);
if(force || properties == null){
properties = new Properties();
String dataImportPropertiesPath = configDir + "\\dataimport.properties";
FileInputStream fis = new FileInputStream(dataImportPropertiesPath);
properties.load(fis);
}
} catch(FileNotFoundException fnfe){
logger.error("Error locating DataImportScheduler dataimport.properties file", fnfe);
} catch(IOException ioe){
logger.error("Error reading DataImportScheduler dataimport.properties file", ioe);
} catch(Exception e){
logger.error("Error loading DataImportScheduler properties", e);
}
}
public String getProperty(String key){
return properties.getProperty(key);
}
}
3 在solr.home的文件夹下建立conf 文件夹
属性文件dataimport.properties放在该文件夹下
我的属性文件内容配置如下
#################################################
# #
# dataimport scheduler properties #
# #
#################################################
# to sync or not to sync
# 1 - active ; anything else - inactive
syncEnabled=1
# which cores to schedule
# in a multi-core environment you can decide which cores you want syncronized
# leave empty or comment it out if using single-core deployment
syncCores=core0
# solr server name or IP address
# [ defaults to localhost if empty ]
server=localhost
# solr server port
# [ defaults to 80 if empty ]
port=8080
# application name/context
# [ defaults to current ServletContextListener's context (app) name ]
webapp=solr
# URL params [ mandatory ]
# remainder of URL
params=/dataimport?command=delta-import&clean=false&commit=true
# params=/dataimport?command=delta-import&clean=false&commit=true
# schedule interval
# number of minutes between two runs
# [ defaults to 30 if empty ]
# #
# dataimport scheduler properties #
# #
#################################################
# to sync or not to sync
# 1 - active ; anything else - inactive
syncEnabled=1
# which cores to schedule
# in a multi-core environment you can decide which cores you want syncronized
# leave empty or comment it out if using single-core deployment
syncCores=core0
# solr server name or IP address
# [ defaults to localhost if empty ]
server=localhost
# solr server port
# [ defaults to 80 if empty ]
port=8080
# application name/context
# [ defaults to current ServletContextListener's context (app) name ]
webapp=solr
# URL params [ mandatory ]
# remainder of URL
params=/dataimport?command=delta-import&clean=false&commit=true
# params=/dataimport?command=delta-import&clean=false&commit=true
# schedule interval
# number of minutes between two runs
# [ defaults to 30 if empty ]
interval=10