通过NIO的FileChannel实现文件的切割。
指定大小
package com.ityj.nio;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.StopWatch;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.List;
@Slf4j
public class FileSplitBySizeExample {
public static void main(String[] args) throws IOException {
StopWatch stopWatch = new StopWatch();
stopWatch.start();
String inputFilePath = "D:\\迅雷云盘\\国蒙双语.4K.H265.AAC-YYDS.mkv.xltd";
String outputDirPath = "D:\\XmpCache";
long maxFileSize = 1024 * 1024 * 1024L; // 每个输出文件的最大大小(1GB)
Path inputFile = Paths.get(inputFilePath);
ByteBuffer buffer = ByteBuffer.allocate(1024 * 1024);
try (FileChannel inputFileChannel = FileChannel.open(inputFile, StandardOpenOption.READ)) {
List<Path> outputFiles = new ArrayList<>();
Path outputFile = null;
long outputFileSize = 0;
int fileCount = 0;
while (inputFileChannel.read(buffer) > 0) {
buffer.flip();
while (buffer.hasRemaining()) {
if (outputFile == null || outputFileSize >= maxFileSize) {
outputFile = Paths.get(outputDirPath, "output_" + fileCount++);
outputFiles.add(outputFile);
outputFileSize = 0;
}
try (FileChannel outputFileChannel = FileChannel.open(outputFile, StandardOpenOption.CREATE, StandardOpenOption.APPEND)) {
outputFileSize += outputFileChannel.write(buffer);
}
}
buffer.clear();
}
}
stopWatch.stop();
System.out.println("Split file completed.");
log.info("Time cost:{}", stopWatch.getTotalTimeSeconds());
}
}
指定行数
// todo error
package com.ityj.nio;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class FileSplitByLinesExample {
public static void main(String[] args) throws IOException {
Path inputFilePath = Paths.get( "D:\\XmpCache\\aa.txt");
Path outputDirPath = Paths.get("D:\\XmpCache");
int maxLines = 1000000; // 每个输出文件的最大行数
Charset charset = Charset.forName("UTF-8");
CharsetDecoder decoder = charset.newDecoder();
ByteBuffer buffer = ByteBuffer.allocate(1024);
FileChannel inputChannel = FileChannel.open(inputFilePath);
List<String> lines = new ArrayList<>();
int fileCount = 0;
int lineCount = 0;
int bytesRead;
while ((bytesRead = inputChannel.read(buffer)) != -1) {
buffer.flip();
String data = decoder.decode(buffer).toString();
String[] splitData = data.split("\r\n");
for (String s : splitData) {
if (lineCount >= maxLines) {
writeOutputFile(outputDirPath, fileCount++, lines);
lines.clear();
lineCount = 0;
}
lines.add(s);
lineCount++;
}
buffer.clear();
}
if (!lines.isEmpty()) {
writeOutputFile(outputDirPath, fileCount, lines);
}
inputChannel.close();
System.out.println("Split file completed.");
}
private static void writeOutputFile(Path outputDirPath, int fileCount, List<String> lines) throws IOException {
Path outputFilePath = outputDirPath.resolve("output_" + fileCount);
Files.write(outputFilePath, lines, Charset.forName("UTF-8"));
}
}
适配gzip
package com.ityj.utils;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class FileSplitter {
private static final int BUFFER_SIZE = 1024 * 1024;
private static final String GZIP_EXTENSION = ".gz";
public static void splitFileByLineCount(File inputFile, int linesPerFile) throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(inputFile))));
FileChannel inputChannel = new FileInputStream(inputFile).getChannel()) {
String line;
int lineCount = 0;
int fileNumber = 0;
ByteBuffer buffer = ByteBuffer.allocate(BUFFER_SIZE);
GZIPOutputStream gzipOut = null;
while ((line = reader.readLine()) != null) {
if (lineCount % linesPerFile == 0) {
if (gzipOut != null) {
gzipOut.finish();
gzipOut.close();
}
fileNumber++;
String outputFilePath = inputFile.getParent() + "/" + inputFile.getName() + "." + fileNumber + GZIP_EXTENSION;
gzipOut = new GZIPOutputStream(new FileOutputStream(outputFilePath));
}
byte[] lineBytes = (line + "\n").getBytes();
if (buffer.remaining() < lineBytes.length) {
buffer.flip();
inputChannel.write(buffer);
buffer.clear();
}
buffer.put(lineBytes);
lineCount++;
}
if (gzipOut != null) {
gzipOut.finish();
gzipOut.close();
}
buffer.flip();
inputChannel.write(buffer);
}
}
public static void main(String[] args) throws IOException {
File inputFile = new File("inputFile.gz");
int linesPerFile = 100000;
splitFileByLineCount(inputFile, linesPerFile);
}
}
简单按行读文件
public static List<String> readFileWithNIO(String filePath) {
List<String> lines = new ArrayList<>();
try (FileChannel fileChannel = FileChannel.open(Path.of(filePath), StandardOpenOption.READ)) {
ByteBuffer byteBuffer = ByteBuffer.allocate(1024); // Buffer to hold data
StringBuilder lineBuilder = new StringBuilder(); // StringBuilder to accumulate a line
int bytesRead = fileChannel.read(byteBuffer); // Read first chunk of data from file
while (bytesRead != -1) { // Loop until end of file
byteBuffer.flip(); // Switch buffer from writing mode to reading mode
while (byteBuffer.hasRemaining()) { // Loop until buffer is empty
byte b = byteBuffer.get(); // Get one byte
if (b == '\n') { // End of line
String line = lineBuilder.toString();
lineBuilder.setLength(0); // Reset the StringBuilder for the next line
// Process the line as needed
lines.add(line);
} else { // Add the byte to the StringBuilder
lineBuilder.append((char) b);
}
}
byteBuffer.clear(); // Switch buffer back to writing mode
bytesRead = fileChannel.read(byteBuffer); // Read next chunk of data from file
}
} catch (IOException e) {
e.printStackTrace();
}
return lines;
}
#!/bin/bash
# 输入文件名和输出文件名
input_file="input.txt"
output_file="output.txt"
# 如果输出文件已经存在,则删除它
if [ -f $output_file ]; then
rm $output_file
fi
# 逐行读取输入文件,并对每行按照 | 进行分隔
while read line
do
# 使用 awk 命令对每行进行分隔
# 如果第三个字段不是 A999,则将该行写入输出文件
# 注意,此处的 $0 代表整行文本
echo $line | awk -F "|" '{ if ($3 != "A999") print $0 }' >> $output_file
done < $input_file
#!/bin/bash
# Input file name
input_file="input.txt"
# Output file name
output_file="output.txt"
# Loop through each line of the input file
while IFS= read -r line
do
# Split the line into fields using the "|" delimiter
fields=($(echo "$line" | tr '|' '\n'))
# Check if the third field is "A999"
if [ "${fields[2]}" != "A999" ]; then
# If the third field is not "A999", output the line to the output file
echo "$line" >> "$output_file"
fi
done < "$input_file"
压缩一个大文件(gz)
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.zip.GZIPOutputStream;
public class GzipFileCompressor {
public static void main(String[] args) {
String sourceFilePath = "path/to/your/largefile.txt";
String compressedFilePath = "path/to/your/compressedfile.gz";
try (
FileInputStream fileInputStream = new FileInputStream(sourceFilePath);
FileChannel fileInputChannel = fileInputStream.getChannel();
GZIPOutputStream gzipOutputStream = new GZIPOutputStream(new FileOutputStream(compressedFilePath))
) {
ByteBuffer buffer = ByteBuffer.allocateDirect(1024);
while (fileInputChannel.read(buffer) != -1) {
buffer.flip();
byte[] data = new byte[buffer.limit()];
buffer.get(data);
gzipOutputStream.write(data);
buffer.clear();
}
System.out.println("File compressed successfully.");
} catch (IOException e) {
e.printStackTrace();
}
}
}
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.zip.GZIPOutputStream;
public class NIOGzipFileCompressor {
public static void main(String[] args) {
String sourceFilePath = "path/to/your/largefile.txt";
String compressedFilePath = "path/to/your/compressedfile.gz";
try (
FileInputStream fileInputStream = new FileInputStream(sourceFilePath);
FileOutputStream fileOutputStream = new FileOutputStream(compressedFilePath);
GZIPOutputStream gzipOutputStream = new GZIPOutputStream(fileOutputStream);
FileChannel inChannel = fileInputStream.getChannel();
FileChannel outChannel = fileOutputStream.getChannel()
) {
ByteBuffer buffer = ByteBuffer.allocate(1024);
while (inChannel.read(buffer) != -1) {
buffer.flip(); // Switch to read mode
outChannel.write(buffer);
buffer.clear(); // Switch to write mode
}
System.out.println("File compressed successfully.");
} catch (IOException e) {
e.printStackTrace();
}
}
}
ByteBuffer buffer = ByteBuffer.allocate(8192); // 8KB缓冲区大小
while (inputChannel.read(buffer) > 0 || buffer.position() > 0) {
buffer.flip();
gzipOut.write(buffer.array(), 0, buffer.limit());
buffer.clear();
}
String url = "http://192.168.142.129:9000/api/rules/search";
String token = "8eb31bf43a5bc196cb9eed880be4a46651fbc8c8:";
String basicAuth = "Basic " + new String(Base64.getEncoder().encode(token.getBytes("UTF-8")));
HttpHeaders requestHeaders = new HttpHeaders();
//添加认证的请求头
requestHeaders.add("Authorization", basicAuth);
HttpEntity<String> requestEntity = new HttpEntity<String>(null, requestHeaders);
ResponseEntity<JSONObject> repsonse = restTemplate.exchange(url, HttpMethod.GET, requestEntity, JSONObject.class);
JSONObject body = repsonse.getBody();
return body;
#!/bin/bash
output_file="/path/to/your/directory/output.log"
for ((count=0; count<250; count++)); do
current_time=$(date +"%Y-%m-%d %H:%M:%S")
echo "[$current_time] $(zing-ps)" >> "$output_file" # 追加带时间戳的输出到文件
sleep 60
done
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class GenericSplitCollectionExample {
public static void main(String[] args) {
// 创建一个示例列表
List<String> originalList = List.of("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t");
// 将列表分为小集合
int batchSize = 5; // 定义小集合的大小
Map<Integer, List<String>> splitCollections = splitCollection(originalList, batchSize);
// 打印结果
splitCollections.forEach((key, value) -> System.out.println("Collection " + key + ": " + value));
}
// 将列表分为小集合
private static <T> Map<Integer, List<T>> splitCollection(List<T> list, int batchSize) {
return list.stream()
.collect(Collectors.groupingBy(index -> (list.indexOf(index)) / batchSize));
}
}
package com.ityj.algorithm.gz;
import java.io.*;
import java.util.zip.GZIPOutputStream;
public class CompressFileToGz {
public static void main(String[] args) {
// 指定输入文件路径
String inputFilePath = "data.dat";
// 指定输出文件路径
String outputFilePath = "data.dat.gz";
// 使用try-with-resources确保资源正确关闭
try (FileInputStream fis = new FileInputStream(inputFilePath);
GZIPOutputStream gzipOS = new GZIPOutputStream(new FileOutputStream(outputFilePath))) {
// 设置缓冲区大小
byte[] buffer = new byte[1024];
int bytesRead;
// 从输入文件读取数据并写入GZ压缩输出流
while ((bytesRead = fis.read(buffer)) != -1) {
gzipOS.write(buffer, 0, bytesRead);
}
System.out.println("文件已成功压缩成 " + outputFilePath);
} catch (IOException e) {
e.printStackTrace();
}
}
}
package com.ityj.algorithm.gz;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
public class WriteCollectionToDatFile {
public static void main(String[] args) {
// 创建一个ArrayList并添加一些数据
ArrayList<String> dataList = new ArrayList<>();
dataList.add("Item 1");
dataList.add("Item 2");
dataList.add("Item 3");
// 指定输出文件的路径
String filePath = "data.dat";
// 使用try-with-resources确保资源正确关闭
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) {
// 按行写入集合数据
for (String item : dataList) {
writer.write(item);
writer.newLine(); // 换行
}
System.out.println("集合数据已成功写入到 " + filePath);
} catch (IOException e) {
e.printStackTrace();
}
}
}
package com.ityj.algorithm.gz;
import java.io.*;
import java.util.ArrayList;
import java.util.zip.GZIPOutputStream;
public class WriteCollectionToGzInMemory {
public static void main(String[] args) {
// 创建一个ArrayList并添加一些数据
ArrayList<String> dataList = new ArrayList<>();
dataList.add("Item 1");
dataList.add("Item 2");
dataList.add("Item 3");
// 使用try-with-resources确保资源正确关闭
try (ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
GZIPOutputStream gzipOS = new GZIPOutputStream(byteStream)) {
// 按行写入集合数据到Gzip压缩输出流
try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(gzipOS))) {
for (String item : dataList) {
writer.println(item);
}
}
// 获取压缩后的字节数组
byte[] compressedData = byteStream.toByteArray();
// 将压缩后的数据保存到文件
String gzFilePath = "data.dat.gz";
try (FileOutputStream fos = new FileOutputStream(gzFilePath)) {
fos.write(compressedData);
}
System.out.println("集合数据已成功写入到 " + gzFilePath);
} catch (IOException e) {
e.printStackTrace();
}
}
}
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.stream.IntStream;
public class WriteLargeDataFileExample {
public static void main(String[] args) {
// 指定要写入的文件路径
Path filePath = Path.of("large_data_file.txt");
try (BufferedWriter writer = Files.newBufferedWriter(filePath, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) {
// 生成要写入文件的大量数据,这里使用简单的示例,写入十亿行每行10个字段的数据
IntStream.range(0, 1000000000).forEach(i -> {
StringBuilder line = new StringBuilder();
IntStream.range(0, 10).forEach(j -> line.append("Field").append(j + 1).append("\t")); // 用制表符分隔字段
line.append(System.lineSeparator());
try {
writer.write(line.toString());
} catch (IOException e) {
e.printStackTrace();
}
});
System.out.println("大数据文件写入成功!");
} catch (IOException e) {
e.printStackTrace();
}
}
}
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.stream.IntStream;
public class WriteBillionDataWithNIO {
public static void main(String[] args) {
// 指定要写入的文件路径
Path filePath = Path.of("billion_data_nio.txt");
try (FileChannel channel = FileChannel.open(filePath, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) {
// 生成十亿条包含10个字段的数据
for (int i = 0; i < 1000000000; i++) {
StringBuilder line = new StringBuilder();
for (int j = 0; j < 10; j++) {
// 生成字段数据,这里简单地使用数字和逗号分隔
line.append("Field").append(j + 1).append(",");
}
// 将数据写入ByteBuffer
ByteBuffer buffer = ByteBuffer.wrap(line.toString().getBytes());
// 将ByteBuffer写入文件Channel
channel.write(buffer);
// 写入换行符
channel.write(ByteBuffer.wrap(System.lineSeparator().getBytes()));
}
System.out.println("十亿条数据使用NIO写入成功!");
} catch (IOException e) {
e.printStackTrace();
}
}
}