完整代码架子下载地址(只需写逻辑即可运行)链接: 点这里.
1.创建一个程序入口类,
启动类里面调用底下的私有方法extract即可.
2.多线程创建
private static List<String> extract(List<String> files,List<String> caseIds){
List<Future<List<String>>> tasks = new ArrayList<Future<List<String>>>();
//这里注意下,线程不能开太多把服务器弄爆了。----另外只要修改TXT规则即可
ExecutorService fixedThreadPool = Executors.newFixedThreadPool(20);
List<String> reslt = new ArrayList<String>();
int k = files.size() / 20;
if (k < 1) k = 1;
for (int i = 0; i < 19; i++) {
List<String> filePaths;
if ((i + 1) * k >= files.size()) {
filePaths = files.subList(i * k, files.size());
tasks.add(fixedThreadPool.submit(new Txt(filePaths,caseIds)));//这里是处理数据的逻辑
break;
} else {
filePaths = files.subList(i * k, (i + 1) * k);
tasks.add(fixedThreadPool.submit(new Txt(filePaths,caseIds)));
}
}
if (20 * k < files.size()) {
List<String> filePaths = files.subList(19 * k, files.size());
tasks.add(fixedThreadPool.submit(new Txt(filePaths,caseIds)));
}
try {
while (!tasks.isEmpty()) {
for (int i = 0; i < tasks.size(); ++i) {
if (tasks.get(i).isDone()) {
reslt.addAll(tasks.get(i).get());
tasks.remove(i);
--i;
}
}
}
} catch (Exception ex) {
ex.printStackTrace();
}
fixedThreadPool.shutdown();
return reslt;
}
3.多线程内处理业务的逻辑为初始化Txt类
//关键点是这个类继承了Callable接口,然后重写了call方法,在call方法内处理的数据
public class Txt implements Callable<List<String>> {
public List<String> filePaths;
public List<String> caseIds;
public Txt(List<String> filePaths,List<String> caseIds) {
this.filePaths = filePaths;
this.caseIds = caseIds;
}
private static RabbitApi rabbitApi = new RabbitApi();
public List<String> call() {
List<String> list = new ArrayList<String>();
for (String file : filePaths) {
try {
String content = FileUtils.readDoc(file);
if (!"".equals(content)) {
String anhao = "";
String unit = "";
String fact = "";
WebApiResult<RabbitInfo> war = rabbitApi.RabbitInterpret(content, "base");
if (null != war.getData()) {
Map<String, Object> extractInfo = war.getData().getExtractInfo();
anhao = (String) extractInfo.get("meta_案号");
unit = (String) extractInfo.get("info_被告人所在单位");
fact = (String) extractInfo.get("info_检察院指控事实");
}
if (!"".equals(anhao) && null != anhao) {
if (caseIds.contains(anhao)) {
StringBuilder sb = new StringBuilder();
sb.append(anhao).append("\t").append(unit).append("\t").append(fact);
list.add(sb.toString());
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
return list;
}
}
到此多线程原理完毕
接下来是启动上述逻辑的sh脚本代码
export JRE_HOME=$JAVA_HOME/jre
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
JAVA_OPTS='-Xms1024m -Xmx9192m'
export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
temp=$CLASSPATH
#setting libs path
libs=$PWD/lib/*
for file in $libs; do
temp=$temp":"$file
done
export CLASSPATH=$temp
#第一个参数是json文件的路径 第二个参数是索引名称----这里的参数可以在main方法里获取到
java $JAVA_OPTS -classpath $CLASSPATH com.jingzhong.Extract /home/task/baisi_ws/source/ /home/task/fd/zs.txt /home/task/fd/caseId.txt >> $PWD/log 2>&1 &
脚本启动方式是打包成tar包后,解压到服务器上,然后运行解压后的run.sh这个文件即可,运行命令为sh run.sh
打包方式
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId> maven-assembly-plugin </artifactId>
<version>2.6</version>
<configuration>
<finalName>${project.artifactId}</finalName>
<descriptors>
<descriptor>src/main/assembly/assembly.xml</descriptor>
</descriptors>
<archive>
<manifest>
<mainClass>com.jingzhong.Extract</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
打包用的另外一个配置assembly.xml
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3
http://maven.apache.org/xsd/assembly-1.1.3.xsd">
<id>tool</id>
<formats>
<format>tar.gz</format>
</formats>
<includeBaseDirectory>true</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>lib/</outputDirectory>
<!--是否使用项目本身打包出来的jar-->
<useProjectArtifact>false</useProjectArtifact>
<unpack>false</unpack>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
<fileSets>
<fileSet>
<directory>${project.basedir}/lib</directory>
<outputDirectory>lib</outputDirectory>
<includes>
<include>fd-rabbit-*.jar</include>
<include>dom4j-1.6.1.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>${project.build.directory}/classes</directory>
<outputDirectory>${file.separator}</outputDirectory>
<includes>
<include>**/*.class</include>
</includes>
<useDefaultExcludes>true</useDefaultExcludes>
</fileSet>
<fileSet>
<directory>src/main/resources/scripts</directory>
<outputDirectory>${file.separator}</outputDirectory>
<includes>
<include>*.sh</include>
</includes>
</fileSet>
</fileSets>
</assembly>
备注:fd-topic-rabbit,