IO流进行文件读写操作

最新推荐文章于 2024-08-01 17:15:30 发布

serendipity_xr

最新推荐文章于 2024-08-01 17:15:30 发布

阅读量1.1k

点赞数

文章标签： java poi excel

本文链接：https://blog.csdn.net/serendipity_xr/article/details/111300727

版权

IO流进行文件读写操作

一.Java程序读excel文件

excel文件分为xls,xlsx和csv文件。

1.xls和xlsx的主要区别是版本不同：

xls是excel2003及以前版本所生成的文件格式。
xlsx是excel2007及以后版本所生成的文件格式。
（excel 2007之后版本可以打开上述两种格式，但是excel2013只能打开xls格式）

2.一个excel文件的特点：

一个excel文件中有个文件多张表叫sheet。
在excel文件中表是有行和列组成的，单元格一行一列。

3.使用三方架包（jxl，poi ）

基本步骤：

下载jar包(jxl) .
使用 workbook来表示excel.
使用sheet对象类表示每一个sheet表.
row表示行.

4.代码

Student.java

public class Student {  
	
	private int id;
	private  String name;
	private int age;
	public int getId() {
		return id;
	}
	public void setId(int id) {
		this.id = id;
	}
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public int getAge() {
		return age;
	}
	public void setAge(int age) {
		this.age = age;
	}
	public Student(int id, String name, int age) {
		super();
		this.id = id;
		this.name = name;
		this.age = age;
	}
	public Student() {
		super();
		// TODO Auto-generated constructor stub
	}
	@Override
	public String toString() {
		return "Student [id=" + id + ", name=" + name + ", age=" + age + "]";
	}
	

}

ReadExcel.java

public class ReadExcel {

	public static void main(String[] args) {

		// 指定目标文件
//		File file = new File("stu.xls");
		File file = new File("student.xlsx");
//		readExcel(file);
		readExcelToList(file);
	}

	// 读取文件
	public static void readExcel(File file) {

		try {
			// 将文件内容保存到io流中
			FileInputStream fis = new FileInputStream(file);
			// 使用workbook来表示 excel文件
			Workbook workbook = Workbook.getWorkbook(file);
			System.out.println(workbook);
			// 获取所有的表
//			Sheet[] sheets = workbook.getSheets();
//			for (Sheet sheet : sheets) {
//				System.out.println(sheet.getName());
//			}
			// 获取student名字的sheet对象 
//			Sheet sheet = workbook.getSheet(0);
			Sheet sheet = workbook.getSheet("student");
			System.out.println(sheet.getName());
			
			// 从表中获取  每一行数据  
			System.out.println(sheet.getRows());// 有效行数
			System.out.println(sheet.getColumns());// 有效列数
			
			// 单元格 对象   每一个数据都保存在单元格中   
			// 表的起始坐标是0，0  
			Cell cell = sheet.getCell(0, 0);
			// 获取单元格中的内容  
			String contents = cell.getContents();
			System.out.println();
			
			// 输出student表中的所有数据 
			for (int i = 0; i < sheet.getRows(); i++) {// 第几行
				 for (int j = 0; j < sheet.getColumns(); j++) {//第几列
					 // 获取每一行的每一个单元格  就是一个student对象   一行代表一个对象
					 // j 列位置   i 行位置  
					 System.out.print(sheet.getCell(j, i).getContents()+"----");
					
				 }
				 System.out.println();//换行 
			}
			
			
			
		} catch (BiffException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

	}

	// 将excel文件  读取到之后  保存到Java的student对象中，在保存到集合中
	public  static void  readExcelToList(File file) {
		ArrayList<Student> stus=new ArrayList<Student>();
		//创建 workbook
		try {
			Workbook workbook = Workbook.getWorkbook(file);
			// 得到sheet
			Sheet sheet = workbook.getSheet(0);
			for (int i =1; i < sheet.getRows(); i++) {
				Student stu=new Student();
				for (int j = 0; j < sheet.getColumns(); j++) {
					 String contents = sheet.getCell(j, i).getContents();
					// 类型转换  
					if(j==0) {
						 stu.setId(Integer.valueOf(contents));
					}else if(j==1){
//						 String contents = sheet.getCell(j, i).getContents();
						 stu.setName(contents);
					}else {
//						 String contents = sheet.getCell(j, i).getContents();
						 stu.setAge(Integer.valueOf(contents));
					}
				}
				stus.add(stu);
			}
			
		} catch (BiffException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		for (Student student : stus) {
			System.out.println(student);
		}
	}
	
	
	// 普通读取 不能使用
	public void readec(File file) {
		try {
			FileInputStream fis = new FileInputStream(file);
			int len = 0;
			while ((len = fis.read()) != -1) {
				System.out.print((char) len);
			}

		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

使用poi的话需要重新导包，使用HSSFWorkBook类。
Java对格式化文件的操作是为了获取格式化文件中的数据的。

Java代码创建 excel文件

将java对象信息写出到excel文件中

基本步骤：

创建目标文件，调用WritableWorkbook对象
创建sheet

//创建sheet表
//第一个参数是sheet的名，第二个是sheet的位置
WritableSheet createSheet = createWorkbook.createSheet("stu",0);

创建单元格对象，将单元格对象添加到sheet中。
将数据写出到 excel文件中，可以使用write方法。
关闭资源

public class WriteExcel2 {
	public static void main(String[] args) {
		//writerExcel();
		Scanner input=new Scanner(System.in);
		System.out.println("请选择你的操作   1导出数据  ");
		String choose=input.next();
		if(choose.equals("1")) {
			exportStudent();
		}
	}
	//创建excel文件
public static void writerExcel(){
	//创建目标文件
	try {
		WritableWorkbook createWorkbook = Workbook.createWorkbook(new FileOutputStream(new File("D:\\Java1\\ssss.xls")));
		//创建sheet表
		WritableSheet createSheet01 = createWorkbook.createSheet("stu01", 0);
		WritableSheet createSheet02 = createWorkbook.createSheet("stu02", 1);
		WritableSheet createSheet03 = createWorkbook.createSheet("stu03", 2);
		//创建单元格对象并添加到sheet
			//先创建表头
			Label id=new Label(0,0,"学生编号");
			createSheet01.addCell(id);
			Label name=new Label(1,0,"学生姓名");
			createSheet01.addCell(name);
			Label age=new Label(2,0,"学生年龄");
			createSheet01.addCell(age);
		//写出数据
		createWorkbook.write();
		//关闭资源
		createWorkbook.close();
		System.out.println("文件写出成功");
	} catch (FileNotFoundException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	} catch (RowsExceededException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	} catch (WriteException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	
	
}
public static void readExcel() {
	try {
		Workbook workbook = Workbook.getWorkbook(new FileInputStream(new File("D:\\Java1\\ssss.xls")));
		System.out.println(workbook.getSheets());
	} catch (BiffException | IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
//导出数据
	public  static void  exportStudent() {
		//要导出的数据
		List<Student> students=new ArrayList<Student>();
		for (int i = 0; i <10; i++) {
			Student stu=new Student(i,"张三",15+i);
			students.add(stu);
		}
		//导出目标
		try {
			WritableWorkbook createWorkbook = Workbook.createWorkbook(new FileOutputStream(new File("D:\\Java1\\ssss.xls")));
			//创建sheet
			WritableSheet createSheet = createWorkbook.createSheet("学生信息表", 0);
			//sheet表中的数据，表头，内容
			//表头
			Label id=new Label(0,0,"学生编号");
			createSheet.addCell(id);
			Label name=new Label(1,0,"学生姓名");
			createSheet.addCell(name);
			Label age=new Label(2,0,"学生年龄");
			createSheet.addCell(age);
			//内容
			for (int i = 1; i <= students.size(); i++) {
				//获取集合中的每一个对象
				Student student=students.get(i-1);
				//将对象的每一个字段的值保存到每一行的每一个单元格中
				Number messid=new Number(0,i,student.getId());
				createSheet.addCell(messid);
				Label messname=new Label(1,i,student.getName());
				createSheet.addCell(messname);
				Number message=new Number(2,i,student.getAge());
				createSheet.addCell(message);
			}
			//写出数据
			createWorkbook.write();
			//关闭资源
			createWorkbook.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (RowsExceededException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (WriteException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		System.out.println("数据导出成功  请去文件中查看  ");
		
	}
}

二.Java程序读取word文件

使用poi架包来读取 world文件

1.导入架包

在这里插入图片描述

2.编写代码

根据不同的world文件格式来选择不同的对象操作

public class ReadWord {
	public static void main(String[] args) {
		//源文件
		String src="hello.doc";
		//String src="hello.docx";
		//doc和docx的操作不一样
		if(src.endsWith(".doc")) {
			//读取到io流
			try {
				FileInputStream fis=new FileInputStream(new File(src));
				//使用WordExtractor
				WordExtractor wordExtractor=new WordExtractor(fis);
				//读取文件信息
				String textFromPieces = wordExtractor.getTextFromPieces();
				System.out.println(textFromPieces);
			} catch (FileNotFoundException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}else if(src.endsWith(".docx")) {
			//使用xml文档对象的形式去操作
			try {
				OPCPackage openPackage = POIXMLDocument.openPackage(src);
				//需要报xmlexception异常
				POIXMLTextExtractor extractor=new XWPFWordExtractor(openPackage);
				//读取内容
				String text = extractor.getText();
				System.out.println(text);
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (XmlException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (OpenXML4JException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}else {
			System.out.println("格式不符合要求");
		}
	}
}

三.Java网络爬虫

Java代码读取某一个网页中的所有图片.
Java代码：必须使用URL对象,正则表达式,io流的文件拷贝.

基本步骤：

指定目标文件 String字符串（网络地址）
获取URL对象
通过url开启java程序与资源的连接
将资源读取到io流中
文件拷贝操作

1.单个网络资源下载(单一下载)

public class OneFileDown {
	public static void main(String[] args) {
		//目标文件位置
		String src="https://www.zhongguofeng.com/uploads/allimg/180723/13-1PH31P112.jpg";
		//获取URL对象
		try {
			URL url=new URL(src);
			//通过url获取Java程序与资源的连接
			URLConnection openConnection = url.openConnection();
			//将资源读取到io流中
			InputStream inputStream = openConnection.getInputStream();
			//文件拷贝操作
			IOUtils.copy(inputStream,new FileOutputStream(new File("new.jpg")));
			System.out.println("文件拷贝成功");
			//关闭资源
			inputStream.close();
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

2.目标路径中的所有需要的资源文件的下载(多文件下载)

基本步骤：

指定目标文件
创建URL对象
建立连接
将源文件内容读取到io流
转换，变成字符缓冲流
读取每一行数据
获取每一个img标签中的src的内容
下载之后的文件的名字

2.1获取单个信息

不同的网页结构需要使用不同的正则表达式来处理.

public class AllFileDown {
	public static void main(String[] args) {
		getALLImg();
	}
	
	//获取单个信息
	public static void getALLImg() {
		int index=0;
		//指定目标文件
		String address="http://www.slxy.cn/info/1187/11784.htm";
		//创建URL对象
		try {
			URL url=new URL(address);
			//建立连接
			URLConnection openConnection = url.openConnection();
			//将源文件内容读取到io流
			InputStream inputStream = openConnection.getInputStream();
			//转换，变成字符缓冲流
			InputStreamReader isr=new InputStreamReader(inputStream);
			BufferedReader br=new BufferedReader(isr);
			//读取每一行数据
			String buff=null;
			while((buff=br.readLine())!=null) {
				/*
				 * 使用正则表达式来获取页面中的所有的img标签
				 *  定义正则匹配规则  pattern  
				 *  进行匹配  将结果保存到matcher 中 
				 *  从matcher获取匹配结果  
				 */
				//<img.*src\\s*=\\s*(.*?)>
				Pattern compile = Pattern.compile("<img.*src\\s*=\\s*(.*?)>");
				Matcher matcher = compile.matcher(buff);
				while(matcher.find()) {
					String group = matcher.group();
					
					//获取每一个img标签中的src的内容 
				Pattern spatt=Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)");
				Matcher smatcher = spatt.matcher(buff);//每行数据进行匹配
				int count=0;
				while(smatcher.find()) {
					count++;
					index++;
					if(count/2==0) {
						smatcher.find();
					}
					String group2 = smatcher.group();
					System.out.println(group2);
					//在获取src的具体值
					//拼接全路径
					Pattern compile2 = Pattern.compile("/.*.jpg");
					Matcher matcher2 = compile2.matcher(group2);
					matcher2.find();
					String group3 = matcher2.group();
					String path="http://www.slxy.cn"+group3;
					System.out.println("具体的路径下载"+path);
					// 下载之后的文件的名字  
					String  name="slxy"+index+".jpg";
					// 开始下载 
					URL imgurl=new URL(path);
					URLConnection openConnection2 = imgurl.openConnection();
					InputStream inputStream2 = openConnection2.getInputStream();
					IOUtils.copy(inputStream2, new FileOutputStream(new File("D:\\java笔记\\图片",name)));
					System.out.println("第"+index+"个文件下载成功");
				}
				
				
				}
				
				
			}
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

2.2获取所有信息

获取目标网页中的html内容

// 获取所有信息 
		public  static   void  getAllMessage() {
		// 目标文件位置
		String src = "http://www.slxy.cn/info/1187/11677.htm";	
		//将需要的页面html文件全部获取到
		try {
			URL url=new URL(src);
			//创建连接
			try {
				URLConnection openConnection = url.openConnection();
				//加载到io流
				InputStream inputStream = openConnection.getInputStream();
				byte[] b=new byte[inputStream.available()];
				inputStream.read(b);
				//写出去
				FileOutputStream fos=new FileOutputStream(new File("D:\\Java1\\Internet.txt"));
				fos.write(b);
				int len=0;
				while((len=inputStream.read())!=-1) {
					fos.write(len);
				}
				//关闭资源
				fos.flush();
				fos.close();
				inputStream.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		}
			
}