Java读取600万行的txt文件,内存溢出解决方案

原创已于 2024-04-13 17:36:24 修改 · 615 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#java #大数据 #大数据读取

于 2024-04-13 17:35:29 首次发布

Java 专栏收录该内容

26 篇文章

订阅专栏

本文讨论了解决Java应用中一次性读取大文件导致内存溢出的方法，通过使用对象池和文件流，减少频繁new对象，提高性能。介绍了如何创建对象池并在读取文件时复用User对象。

可能造成内存溢出的原因：

一次性把txt文件读取到内存
频繁的new对象

实体类


import lombok.Data;
import java.io.Serializable;

/**
 * @author cpf
 * @date 2024/4/13 14:40
 */
@Data
public class User implements Serializable{

    private String user;

    private String positioningTime;

    private String latitude;

    private String longitude;

    private String locationId;

    public void clear() {
        setUser(null);
        setPositioningTime(null);
        setLatitude(null);
        setLongitude(null);
        setLocationId(null);
    }
}

可以使用对象池解决频繁new对象的问题
解决一次性把文件读取到内存: 可以使用文件流方式，使用java.util.Scanner类扫描文件的内容，一行一行连续地读取



import org.example.entity.User;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;

/**
 * @author cpf
 * @date 2024/4/13 15:45
 */
public class TestMe {

    private static final int USER_POOL_SIZE = 100; // 根据实际情况调整对象池大小

    private static Queue<User> userPool = new LinkedList<>();

    static {
        // 预先创建对象池中的User实例
        for (int i = 0; i < USER_POOL_SIZE; i++) {
            userPool.offer(new User());
        }
    }

    public static void main(String[] args) {
     
        String filePath = "src/main/resources/Gowalla_totalCheckins.txt";
        List<User> users = readData(filePath);
        for (int i = 0; i < 20; i++) {
            // 获取一个1-6000000的随机数
            int random = (int) (Math.random() * 6000000);
            System.out.println("第"+ random +"个数据: " + users.get(random));
        }
        System.out.println(users.size());


    }

    private static List<User> readData(String filePath) {
        List<User> userList = new ArrayList<>();
        FileInputStream fis  = null;
        Scanner sc = null;
        try {
            fis = new FileInputStream(filePath);
            sc = new Scanner(fis, "UTF-8");
            while (sc.hasNextLine()) {
                String line = sc.nextLine();
                User user = parseLine(line);
                if (user != null) {
                    userList.add(user);
                }
                // 在主程序中，使用完User对象后应将其归还到对象池
                userPool.offer(user);
            }
        } catch (FileNotFoundException e) {
            throw new RuntimeException(e);
        } finally {
            try {
                fis.close();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            if (sc != null) {
                sc.close();
            }
        }


        return userList;
    }


    private static User parseLine(String line) {
        String[] data = line.split("\t");
        if (data.length < 5) {
            System.err.println("数据格式错误：需要至少包含5个字段。");
            return null;
        }
        // 从对象池中获取一个User实例
        User user = userPool.poll();
        if (user == null) {
            // 对象池为空时，创建新的User实例
            user = new User();
        }
        // 清除原有数据并填充新行数据
        user.clear();
        user.setUser(data[0]);
        user.setPositioningTime(data[1]);
        user.setLatitude(data[2]);
        user.setLongitude(data[3]);
        user.setLocationId(data[4]);

        return user;
    }


}