1分成两部分 用randomaccessfile
package org.example;
import java.io.RandomAccessFile;
import java.util.HashMap;
import java.util.Map;
public class App {
public static void main(String[] args) throws Exception {
//读文件
//一行一行分割字符串
//再向1个hashMap存放对应的数据
Map<String, Integer> mp = new HashMap<String, Integer>();
RandomAccessFile raf = new RandomAccessFile("e:/word.txt", "rw");
int len = (int) raf.length();
byte[] bts = new byte[len];
raf.read(bts);
//文件获取一半pos
int halfLen = (int) raf.length() / 2;
byte[] one = null;
byte[] two = null;
if (bts[halfLen] == '\n') {
one = new byte[(len + 1) / 2];
two = new byte[(len + 1) / 2];
System.arraycopy(bts, 0, one, 0, len / 2);
System.arraycopy(bts, len / 2, two, 0, len / 2);
} else {
int p = len / 2;
for (; p < len; p++) {
if (bts[p] == '\n') {
break;
}
}
//如果分割后面已经没有回车
if (p == len) {
one = bts;
} else {
one = new byte[p];
two = new byte[len - p + 1];
System.arraycopy(bts, 0, one, 0, p);
System.arraycopy(bts, p, two, 0, len - p- 1);
}
}
raf.close();
String str = new String(one);
System.out.println(str);
System.out.println("==============");
if (two != null) {
String str1 = new String(two);
System.out.println(str1);
}
System.out.println("world".hashCode() % 3);
}
}
效果图
分割成两部分
2模仿hadoop filesplit任意切片
先写实体类
package org.example;
/**
* @Description:
* @author:myh
* @date: 2021/9/16 0:14
*/
public class FileSplit {
private int start;
private String filePath;
private int length;
private String host;
public FileSplit() {
}
public FileSplit(int start, String filePath, int length, String host) {
this.start = start;
this.filePath = filePath;
this.length = length;
this.host = host;
}
public int getStart() {
return start;
}
public void setStart(int start) {
this.start = start;
}
public String getFilePath() {
return filePath;
}
public void setFilePath(String filePath) {
this.filePath = filePath;
}
public int getLength() {
return length;
}
public void setLength(int length) {
this.length = length;
}
public String getHost() {
return host;
}
public void setHost(String host) {
this.host = host;
}
}
psvm
package org.example;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.List;
/**
* @Description:
* @author:myh
* @date: 2021/9/16 0:15
*/
public class TextInputFormat {
String filePath = "e:/word.txt";
String host = "192.168.100.195";
/**
* 用户对文件的分割
*
* @param numSplit 分割的块数
* @return
*/
public List<FileSplit> getSplits(int numSplit) throws Exception {
//文件总长度
RandomAccessFile raf = new RandomAccessFile(filePath, "rw");
int len = (int) raf.length();
// 根据需要进行分割
int splitSize = len / numSplit;
// 判断字符是否合理 如果不合理就向前继续找文件的分割符号
// 找到分割符号后记录这个文件分割符号的位置 FileSplit path start len host(192.168.50.200:9000)
// 找到文件很多的分割点 每个分割点都可以记录
// 所有的FileSplit存放在1个List集合中
List<FileSplit> fs = new ArrayList<FileSplit>();
int location = 0;
// 在用户使用时再用你的RandomAccessFile seek 进行文件读取
byte bt = 0;
try {
do {
if (location + splitSize < len) {
raf.seek(location + splitSize);//跳到对应的文件位置
if (raf.readByte() == 10) {
FileSplit cut = new FileSplit(location, filePath, splitSize, host);
fs.add(cut);
location += splitSize = 1;//将location游标先前推进
} else {
//如果不是一个回车就必须向前读取 直到找到回车或文件末尾
int blockSize = splitSize;//设置当前块默认大小
byte cbt = raf.readByte();
try {
while (cbt != 10 && cbt != 0) {
blockSize++;
cbt = raf.readByte();
}
} catch (IOException e) {
}
FileSplit cut = new FileSplit(location, filePath, blockSize, host);
fs.add(cut);
location += blockSize;
if (cbt == 0) {
break;
}
}
} else {
//如果location+splitSize 超過文件總長度 就直接將最後一段存放在一個FileSplit中
FileSplit sp = new FileSplit(location, filePath, len - location, host);
fs.add(sp);
break;
}
} while (true);
} catch (Exception e) {
}
return fs;
}
public static void main(String[] args) throws Exception {
System.out.println(new TextInputFormat().getSplits(5));
}
}
效果图