2025年NIO 实现大文件切割

大家好，我是讯享网，很高兴认识大家。

通过NIO的FileChannel实现文件的切割。

指定大小

package com.ityj.nio; import lombok.extern.slf4j.Slf4j; import org.springframework.util.StopWatch; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.List; @Slf4j public class FileSplitBySizeExample { 
    public static void main(String[] args) throws IOException { 
    StopWatch stopWatch = new StopWatch(); stopWatch.start(); String inputFilePath = "D:\\迅雷云盘\\国蒙双语.4K.H265.AAC-YYDS.mkv.xltd"; String outputDirPath = "D:\\XmpCache"; long maxFileSize = 1024 * 1024 * 1024L; // 每个输出文件的最大大小（1GB） Path inputFile = Paths.get(inputFilePath); ByteBuffer buffer = ByteBuffer.allocate(1024 * 1024); try (FileChannel inputFileChannel = FileChannel.open(inputFile, StandardOpenOption.READ)) { 
    List<Path> outputFiles = new ArrayList<>(); Path outputFile = null; long outputFileSize = 0; int fileCount = 0; while (inputFileChannel.read(buffer) > 0) { 
    buffer.flip(); while (buffer.hasRemaining()) { 
    if (outputFile == null || outputFileSize >= maxFileSize) { 
    outputFile = Paths.get(outputDirPath, "output_" + fileCount++); outputFiles.add(outputFile); outputFileSize = 0; } try (FileChannel outputFileChannel = FileChannel.open(outputFile, StandardOpenOption.CREATE, StandardOpenOption.APPEND)) { 
    outputFileSize += outputFileChannel.write(buffer); } } buffer.clear(); } } stopWatch.stop(); System.out.println("Split file completed."); log.info("Time cost:{}", stopWatch.getTotalTimeSeconds()); } }

讯享网

指定行数

// todo error

讯享网

讯享网package com.ityj.nio; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; public class FileSplitByLinesExample { 
    public static void main(String[] args) throws IOException { 
    Path inputFilePath = Paths.get( "D:\\XmpCache\\aa.txt"); Path outputDirPath = Paths.get("D:\\XmpCache"); int maxLines = ; // 每个输出文件的最大行数 Charset charset = Charset.forName("UTF-8"); CharsetDecoder decoder = charset.newDecoder(); ByteBuffer buffer = ByteBuffer.allocate(1024); FileChannel inputChannel = FileChannel.open(inputFilePath); List<String> lines = new ArrayList<>(); int fileCount = 0; int lineCount = 0; int bytesRead; while ((bytesRead = inputChannel.read(buffer)) != -1) { 
    buffer.flip(); String data = decoder.decode(buffer).toString(); String[] splitData = data.split("\r\n"); for (String s : splitData) { 
    if (lineCount >= maxLines) { 
    writeOutputFile(outputDirPath, fileCount++, lines); lines.clear(); lineCount = 0; } lines.add(s); lineCount++; } buffer.clear(); } if (!lines.isEmpty()) { 
    writeOutputFile(outputDirPath, fileCount, lines); } inputChannel.close(); System.out.println("Split file completed."); } private static void writeOutputFile(Path outputDirPath, int fileCount, List<String> lines) throws IOException { 
    Path outputFilePath = outputDirPath.resolve("output_" + fileCount); Files.write(outputFilePath, lines, Charset.forName("UTF-8")); } }

适配gzip

package com.ityj.utils; import java.io.*; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; public class FileSplitter { 
    private static final int BUFFER_SIZE = 1024 * 1024; private static final String GZIP_EXTENSION = ".gz"; public static void splitFileByLineCount(File inputFile, int linesPerFile) throws IOException { 
    try (BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(inputFile)))); FileChannel inputChannel = new FileInputStream(inputFile).getChannel()) { 
    String line; int lineCount = 0; int fileNumber = 0; ByteBuffer buffer = ByteBuffer.allocate(BUFFER_SIZE); GZIPOutputStream gzipOut = null; while ((line = reader.readLine()) != null) { 
    if (lineCount % linesPerFile == 0) { 
    if (gzipOut != null) { 
    gzipOut.finish(); gzipOut.close(); } fileNumber++; String outputFilePath = inputFile.getParent() + "/" + inputFile.getName() + "." + fileNumber + GZIP_EXTENSION; gzipOut = new GZIPOutputStream(new FileOutputStream(outputFilePath)); } byte[] lineBytes = (line + "\n").getBytes(); if (buffer.remaining() < lineBytes.length) { 
    buffer.flip(); inputChannel.write(buffer); buffer.clear(); } buffer.put(lineBytes); lineCount++; } if (gzipOut != null) { 
    gzipOut.finish(); gzipOut.close(); } buffer.flip(); inputChannel.write(buffer); } } public static void main(String[] args) throws IOException { 
    File inputFile = new File("inputFile.gz"); int linesPerFile = ; splitFileByLineCount(inputFile, linesPerFile); } }

简单按行读文件

讯享网public static List<String> readFileWithNIO(String filePath) { 
    List<String> lines = new ArrayList<>(); try (FileChannel fileChannel = FileChannel.open(Path.of(filePath), StandardOpenOption.READ)) { 
    ByteBuffer byteBuffer = ByteBuffer.allocate(1024); // Buffer to hold data StringBuilder lineBuilder = new StringBuilder(); // StringBuilder to accumulate a line int bytesRead = fileChannel.read(byteBuffer); // Read first chunk of data from file while (bytesRead != -1) { 
    // Loop until end of file byteBuffer.flip(); // Switch buffer from writing mode to reading mode while (byteBuffer.hasRemaining()) { 
    // Loop until buffer is empty byte b = byteBuffer.get(); // Get one byte if (b == '\n') { 
    // End of line String line = lineBuilder.toString(); lineBuilder.setLength(0); // Reset the StringBuilder for the next line // Process the line as needed lines.add(line); } else { 
    // Add the byte to the StringBuilder lineBuilder.append((char) b); } } byteBuffer.clear(); // Switch buffer back to writing mode bytesRead = fileChannel.read(byteBuffer); // Read next chunk of data from file } } catch (IOException e) { 
    e.printStackTrace(); } return lines; }

#!/bin/bash # 输入文件名和输出文件名 input_file="input.txt" output_file="output.txt" # 如果输出文件已经存在，则删除它 if [ -f $output_file ]; then rm $output_file fi # 逐行读取输入文件，并对每行按照 | 进行分隔 while read line do # 使用 awk 命令对每行进行分隔 # 如果第三个字段不是 A999，则将该行写入输出文件 # 注意，此处的 $0 代表整行文本 echo $line | awk -F "|" '{ if ($3 != "A999") print $0 }' >> $output_file done < $input_file

讯享网#!/bin/bash # Input file name input_file="input.txt" # Output file name output_file="output.txt" # Loop through each line of the input file while IFS= read -r line do # Split the line into fields using the "|" delimiter fields=($(echo "$line" | tr '|' '\n')) # Check if the third field is "A999" if [ "${fields[2]}" != "A999" ]; then # If the third field is not "A999", output the line to the output file echo "$line" >> "$output_file" fi done < "$input_file"

压缩一个大文件(gz)

import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.zip.GZIPOutputStream; public class GzipFileCompressor { 
    public static void main(String[] args) { 
    String sourceFilePath = "path/to/your/largefile.txt"; String compressedFilePath = "path/to/your/compressedfile.gz"; try ( FileInputStream fileInputStream = new FileInputStream(sourceFilePath); FileChannel fileInputChannel = fileInputStream.getChannel(); GZIPOutputStream gzipOutputStream = new GZIPOutputStream(new FileOutputStream(compressedFilePath)) ) { 
    ByteBuffer buffer = ByteBuffer.allocateDirect(1024); while (fileInputChannel.read(buffer) != -1) { 
    buffer.flip(); byte[] data = new byte[buffer.limit()]; buffer.get(data); gzipOutputStream.write(data); buffer.clear(); } System.out.println("File compressed successfully."); } catch (IOException e) { 
    e.printStackTrace(); } } }

讯享网import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.zip.GZIPOutputStream; public class NIOGzipFileCompressor { 
    public static void main(String[] args) { 
    String sourceFilePath = "path/to/your/largefile.txt"; String compressedFilePath = "path/to/your/compressedfile.gz"; try ( FileInputStream fileInputStream = new FileInputStream(sourceFilePath); FileOutputStream fileOutputStream = new FileOutputStream(compressedFilePath); GZIPOutputStream gzipOutputStream = new GZIPOutputStream(fileOutputStream); FileChannel inChannel = fileInputStream.getChannel(); FileChannel outChannel = fileOutputStream.getChannel() ) { 
    ByteBuffer buffer = ByteBuffer.allocate(1024); while (inChannel.read(buffer) != -1) { 
    buffer.flip(); // Switch to read mode outChannel.write(buffer); buffer.clear(); // Switch to write mode } System.out.println("File compressed successfully."); } catch (IOException e) { 
    e.printStackTrace(); } } } ByteBuffer buffer = ByteBuffer.allocate(8192); // 8KB缓冲区大小 while (inputChannel.read(buffer) > 0 || buffer.position() > 0) { 
    buffer.flip(); gzipOut.write(buffer.array(), 0, buffer.limit()); buffer.clear(); }

String url = "http://192.168.142.129:9000/api/rules/search"; String token = "8eb31bf43a5bc196cb9eed880be4a46651fbc8c8:"; String basicAuth = "Basic " + new String(Base64.getEncoder().encode(token.getBytes("UTF-8"))); HttpHeaders requestHeaders = new HttpHeaders(); //添加认证的请求头 requestHeaders.add("Authorization", basicAuth); HttpEntity<String> requestEntity = new HttpEntity<String>(null, requestHeaders); ResponseEntity<JSONObject> repsonse = restTemplate.exchange(url, HttpMethod.GET, requestEntity, JSONObject.class); JSONObject body = repsonse.getBody(); return body;

讯享网#!/bin/bash output_file="/path/to/your/directory/output.log" for ((count=0; count<250; count++)); do current_time=$(date +"%Y-%m-%d %H:%M:%S") echo "[$current_time] $(zing-ps)" >> "$output_file" # 追加带时间戳的输出到文件 sleep 60 done

import java.util.List; import java.util.Map; import java.util.stream.Collectors; public class GenericSplitCollectionExample { 
    public static void main(String[] args) { 
    // 创建一个示例列表 List<String> originalList = List.of("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t"); // 将列表分为小集合 int batchSize = 5; // 定义小集合的大小 Map<Integer, List<String>> splitCollections = splitCollection(originalList, batchSize); // 打印结果 splitCollections.forEach((key, value) -> System.out.println("Collection " + key + ": " + value)); } // 将列表分为小集合 private static <T> Map<Integer, List<T>> splitCollection(List<T> list, int batchSize) { 
    return list.stream() .collect(Collectors.groupingBy(index -> (list.indexOf(index)) / batchSize)); } }

讯享网package com.ityj.algorithm.gz; import java.io.*; import java.util.zip.GZIPOutputStream; public class CompressFileToGz { 
    public static void main(String[] args) { 
    // 指定输入文件路径 String inputFilePath = "data.dat"; // 指定输出文件路径 String outputFilePath = "data.dat.gz"; // 使用try-with-resources确保资源正确关闭 try (FileInputStream fis = new FileInputStream(inputFilePath); GZIPOutputStream gzipOS = new GZIPOutputStream(new FileOutputStream(outputFilePath))) { 
    // 设置缓冲区大小 byte[] buffer = new byte[1024]; int bytesRead; // 从输入文件读取数据并写入GZ压缩输出流 while ((bytesRead = fis.read(buffer)) != -1) { 
    gzipOS.write(buffer, 0, bytesRead); } System.out.println("文件已成功压缩成 " + outputFilePath); } catch (IOException e) { 
    e.printStackTrace(); } } }

package com.ityj.algorithm.gz; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; public class WriteCollectionToDatFile { 
    public static void main(String[] args) { 
    // 创建一个ArrayList并添加一些数据 ArrayList<String> dataList = new ArrayList<>(); dataList.add("Item 1"); dataList.add("Item 2"); dataList.add("Item 3"); // 指定输出文件的路径 String filePath = "data.dat"; // 使用try-with-resources确保资源正确关闭 try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) { 
    // 按行写入集合数据 for (String item : dataList) { 
    writer.write(item); writer.newLine(); // 换行 } System.out.println("集合数据已成功写入到 " + filePath); } catch (IOException e) { 
    e.printStackTrace(); } } }

讯享网package com.ityj.algorithm.gz; import java.io.*; import java.util.ArrayList; import java.util.zip.GZIPOutputStream; public class WriteCollectionToGzInMemory { 
    public static void main(String[] args) { 
    // 创建一个ArrayList并添加一些数据 ArrayList<String> dataList = new ArrayList<>(); dataList.add("Item 1"); dataList.add("Item 2"); dataList.add("Item 3"); // 使用try-with-resources确保资源正确关闭 try (ByteArrayOutputStream byteStream = new ByteArrayOutputStream(); GZIPOutputStream gzipOS = new GZIPOutputStream(byteStream)) { 
    // 按行写入集合数据到Gzip压缩输出流 try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(gzipOS))) { 
    for (String item : dataList) { 
    writer.println(item); } } // 获取压缩后的字节数组 byte[] compressedData = byteStream.toByteArray(); // 将压缩后的数据保存到文件 String gzFilePath = "data.dat.gz"; try (FileOutputStream fos = new FileOutputStream(gzFilePath)) { 
    fos.write(compressedData); } System.out.println("集合数据已成功写入到 " + gzFilePath); } catch (IOException e) { 
    e.printStackTrace(); } } }

 import java.io.BufferedWriter; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.stream.IntStream; public class WriteLargeDataFileExample { 
    public static void main(String[] args) { 
    // 指定要写入的文件路径 Path filePath = Path.of("large_data_file.txt"); try (BufferedWriter writer = Files.newBufferedWriter(filePath, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) { 
    // 生成要写入文件的大量数据，这里使用简单的示例，写入十亿行每行10个字段的数据 IntStream.range(0, ).forEach(i -> { 
    StringBuilder line = new StringBuilder(); IntStream.range(0, 10).forEach(j -> line.append("Field").append(j + 1).append("\t")); // 用制表符分隔字段 line.append(System.lineSeparator()); try { 
    writer.write(line.toString()); } catch (IOException e) { 
    e.printStackTrace(); } }); System.out.println("大数据文件写入成功！"); } catch (IOException e) { 
    e.printStackTrace(); } } } import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.stream.IntStream; public class WriteBillionDataWithNIO { 
    public static void main(String[] args) { 
    // 指定要写入的文件路径 Path filePath = Path.of("billion_data_nio.txt"); try (FileChannel channel = FileChannel.open(filePath, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) { 
    // 生成十亿条包含10个字段的数据 for (int i = 0; i < ; i++) { 
    StringBuilder line = new StringBuilder(); for (int j = 0; j < 10; j++) { 
    // 生成字段数据，这里简单地使用数字和逗号分隔 line.append("Field").append(j + 1).append(","); } // 将数据写入ByteBuffer ByteBuffer buffer = ByteBuffer.wrap(line.toString().getBytes()); // 将ByteBuffer写入文件Channel channel.write(buffer); // 写入换行符 channel.write(ByteBuffer.wrap(System.lineSeparator().getBytes())); } System.out.println("十亿条数据使用NIO写入成功！"); } catch (IOException e) { 
    e.printStackTrace(); } } }

2025年NIO 实现大文件切割

指定大小

指定行数

简单按行读文件

相关推荐