批量生成HTML文件，通过学生名单（学号、姓名）与博客名单（姓名、网址）

最新推荐文章于 2021-07-15 15:12:17 发布

行者-丁又专

最新推荐文章于 2021-07-15 15:12:17 发布

阅读量4.9k

点赞数

分类专栏： Java 文章标签： HTML 正则表达式集合 HashSet

本文链接：https://blog.csdn.net/dyz1982/article/details/22119297

版权

Java 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

构想

很早就想写这个程序了，知道开学3周多了，才下定决心写完。

功能：通过学生名单（学号、姓名）与博客名单（姓名、网址），自动生成HTML文件。

示例文件：学生名单、博客名单。

结果文件（示例）：HTML文件。

运行结果

101 蔡伟权(缺) 102 曹翠芬 103 陈丹凤 104 陈桂强 105 陈家漫(缺)

106 陈镜宇         108 陈满东         110 邓  辉(缺)     114 洪境鹏         115 黄  彬(缺)
117 黄国滔(缺)     119 黄世安         120 黄世君(缺)     122 黄宇倩         124 江宗信(缺)
125 赖长青(缺)     127 梁官荣         128 梁文俊         129 廖始聪(缺)     130 廖  鋆
131 林  瀚         132 林骏琪         133 林镇填(缺)     134 刘  戈(缺)     135 刘威航
136 陆志翔(缺)     139 倪粤鹏         142 容文山         143 容永豪(缺)     144 施恒亮
145 谭永辉         146 韦林莹(缺)     148 吴辉平         150 吴郁鹏         151 吴韵杰(缺)
152 谢碧君         153 谢舒韵         154 谢志杰(缺)     155 许翠怡(缺)     156 杨溢涛
157 杨  媛(缺)     159 余文康         160 曾麒城         161 曾远辉         164 张玉婷
169 庄树填(缺)

使用方式

（a）命令行。编译 javac HtmlBlog.java ；运行 java HtmlBlog wl131.html student_list_wl131.txt blog_list_wl131.txt

（b）Eclipse。直接修改如下

Java源码

/**
 * (1)根据博客名单，自动生成HTML网页；每行5个数据
 * (2)判断博客地址是否符合要求，争取有纠错的功能
 * (3)判断有多少同学的博客没有，需要补充
 * 作者：丁又专
 * 时间：2014.03.26
 */
package indi.dyz.html;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlBlog {
	/**
	 * @param args
	 * @throws FileNotFoundException 
	 */

	/**
	 * 功能：读取名单数据，保存到二维数组中
	 * 输入文件名：要求是两列数据，其中第一行为说明
	 * @param getList
	 * @return
	 * @throws FileNotFoundException
	 */
	public static String[][] getList(String fileName) throws FileNotFoundException{
		//判断文件是否存在
		File file = new File(fileName);
		if(file.exists()==false){
			System.out.println(fileName+"不存在，请检查该文件。");
			return null;
		}

		String strTmp, strLine;
		int numList = 0;
		//判断一共有多少行数据
		Scanner sc = new Scanner(file);
		while(sc.hasNextLine()){
			strTmp = sc.nextLine();	
			if(strTmp.length()>0){
				numList++;	//避免最后有空行
			}			
		}
		sc.close();
		numList = numList-1; //去掉首行说明

		String[][] studentList = new String[2][numList];
		int num = 0;
		//读名单数据
		sc = new Scanner(file);
		strTmp = sc.nextLine();		//过滤首行
		while(num<numList){
			strLine = sc.nextLine();
			//正则表达式进行分割，参考：字符串分割多个空格  http://blog.csdn.net/swarb/article/details/7402888
			String[] strArray = strLine.split("\\s{1,}");
			if(strArray.length==2) {
				studentList[0][num] = strArray[0];
				studentList[1][num] = strArray[1];
			}
			//System.out.println(num+" "+studentList[0][num] +":"+ studentList[1][num]);
			num++;
		}
		sc.close();

		return studentList;
	}

	/**
	 * 功能：判断数组中有哪些重复元素。
	 * @param args
	 * @throws FileNotFoundException
	 */
	public static int getRepeat(String[] strArray){
		int numRepeat = 0;				//重复元素个数
		int numPre = 0, numLater = 0;	//插入元素前集合元素个数、插入后元素个数
		//把元素放到一个集合中，如何集合元素个数没有增加，则此元素重复
		HashSet<String> hs = new HashSet<String>();
		for(String str:strArray){
			hs.add(str);
			numLater = hs.size();
			if(numLater==numPre){
				numRepeat++;
				System.out.println("发现第"+numRepeat+"个重复元素："+str+" 序号："+numLater);
			}
			numPre = numLater;
		}
		return numRepeat;
	}

	/**
	 * 功能：找到两个集合(A,B)中的差异元素，即在A中，不在B中
	 * 思路：list1-list2，即集合的差，如A={1,2,3},B={1,3},则A-B={2}
	 * @param args
	 * @throws FileNotFoundException
	 */
	public static String[] getDiffElement(String[] list1, String[] list2){
		HashSet<String> hs = new HashSet<String>();
		for(String strTmp:list2){
			hs.add(strTmp);
		}

		ArrayList<String> diffList = new ArrayList<String>(); 
		for(String strTmp:list1){
			if(hs.contains(strTmp)==false){
				diffList.add(strTmp);
			}
		}

		int num = diffList.size();
		if(num>0){
			String[] diff = new String[num];
			for(int i=0; i<num; i++){
				diff[i] = diffList.get(i);
			}	
			return diff;
		}	

		return null;
	}

	/**
	 * 功能：利用正则表达式，判断博客网址是否符合规范
	 * @param args
	 * @throws FileNotFoundException
	 */
	public static boolean isCSDNBlog(String blog, String pattern){
		//http://blog.csdn.net/u013899770, true
		//http://write.blog.csdn.net/postlist, false
		//本例：pattern = "(http://blog.csdn.net/){1}";

		Pattern r = Pattern.compile(pattern);
		// 现在创建 matcher 对象
		Matcher m = r.matcher(blog);
		if (m.find( )) {
			//System.out.println(blog+" Found value: " + m.group(1) );
			return true;
		} else {
			//System.out.println("==="+blog+" NO MATCH");
			return false;
		}
	}

	/**
	 * 功能：写HTML文件，根据blog名单与学生名单，并输出没有找到的学生名单
	 * @param args
	 * @throws FileNotFoundException
	 */
	public static int writeHtml(String fileHtml, String fileStudent, String fileBlog, String pattern) throws FileNotFoundException{
		//(1)创建HTML文件
		PrintWriter pw = new PrintWriter(new File(fileHtml));
		//(2)写HTML文件头
		pw.println("<HTML>");
		pw.println("<HEAD>");
		pw.println("	<TITLE>自动生成名单</TITLE>");
		pw.println("</HEAD>");
		pw.println("<BODY>");

		//(3)读取名单数据 与 博客数据
		String[][] studentList = getList(fileStudent);
		String[][] blogList = getList(fileBlog);

		//读取每一个学生名字，判断其博客地址是否正确
		//true：则保存到HTML文件
		//false：则在名字上标注一下（缺）
		String blankHtml = " ";
		String defaultUrl = "http://blog.csdn.net/dyz1982";
		String writeName = "";
		String writeUrl = "";

		int IDLen = studentList[0][0].length();

		int numStu = studentList[0].length;
		int numBlog = 0;
		int t=0, k=0;
		for(k=0; k<numStu; k++){
			//通过student名字，找到blog中网址
			String studentName = studentList[1][k].trim();
			for(t=0; t<blogList[0].length; t++){
				if(studentName.equalsIgnoreCase(blogList[0][t].trim()))
					break;
			}

			if(IDLen<=3){
				if(studentList[0][k].length()<2)
					writeName = blankHtml+studentList[0][k];
				else
					writeName = studentList[0][k];
			}
			else writeName = studentList[0][k].substring(IDLen-3);

			//判断姓名的字数，两个字的名字，中间加两个空格，为了美观
			if(studentName.length()==2){
				writeName += blankHtml + studentName.substring(0,1)+blankHtml+blankHtml+studentName.substring(1,2);
			}else if(studentName.length()>=3){
				writeName += blankHtml + studentName;
			}

			// blogList有地址，并且是符合规则的地址
			if( (t<blogList[0].length) && (isCSDNBlog(blogList[1][t],pattern)) ){	
				numBlog = numBlog+1;
				writeUrl = blogList[1][t];
				writeName = writeName + blankHtml+blankHtml+blankHtml+blankHtml;	//加4个空格，为了对齐
			}else{
				System.out.println(studentList[0][k]+studentName);

				writeUrl = defaultUrl;
				writeName = writeName + "(缺)";					
			}

			//写相应的HTML语句
			pw.print("<a target=_blank target=\"_blank\" href=\""+writeUrl+"\">"+writeName+"</a>");
			pw.println(blankHtml+blankHtml);
			//如果有5个人，则换行
			//最好的网络资源：http://www.w3school.com.cn/tags/tag_br.asp
			if((k+1)%5==0){
				pw.println("<br />");
			}
		}
		//写HTML文件尾
		/*
				</BODY>
				</HTML>
		 */
		pw.println("</BODY>"+"\n"+"</HTML>");
		pw.close();

		return numBlog;
	}

	/**
	 * 功能：对计科的博客名单进行重新处理
	 * @param args
	 * @throws FileNotFoundException
	 */
	public static void modifyJkBlogFile(String fileStudent, String fileBlog) throws FileNotFoundException{
		String[][] studentName = getList(fileStudent);
		String[][] studentBlog = getList(fileBlog);

		//先删除，再创建
		File file = new File(fileBlog);
		file.delete();
		file = new File(fileBlog);

		PrintWriter pw = new PrintWriter(file);
		//把“201111621321” 替换为相应的姓名
		//如果是201111621326吴叶英，则去掉前面的学号
		pw.println("姓名    网址");
		int i=0, j=0;
		for(i=0; i<studentBlog[0].length; i++){

			for(j=0; j<studentName[0].length; j++){
				if(studentBlog[0][i].contains(studentName[0][j]))break;
			}
			System.out.println(studentName[1][j]+"  "+studentBlog[1][i]);
			pw.println(studentName[1][j]+"  "+studentBlog[1][i]);
		}
		pw.close();
	}

	public static void main(String[] args) throws FileNotFoundException {
		// TODO Auto-generated method stub

		String fileStudent;
		String fileBlog;
		String fileHtml;
		//查看命令行参数
		if(args.length==3){
			fileHtml = args[0];
			fileStudent = args[1];
			fileBlog = args[2];
		}else{
			fileHtml = "data/wl131.html";
			fileStudent = "data/student_list_wl131.txt";
			fileBlog = "data/blog_list_wl131.txt";
		}

		//读取文件数据
		String[][] studentName = getList(fileStudent);
		String[][] studentBlog = getList(fileBlog);

		//查看是否存在没有提交博客地址的同学
		String[] diffArray = getDiffElement(studentName[1], studentBlog[0]);
		for(String str:diffArray){
			System.out.println(str);
		}

		//查看博客地址是否准确,输出不准确的博客地址
		String pattern = "(http://blog.csdn.net/){1}[0-9a-zA-Z]+";	//模式字符串
		for(int i=0; i<studentBlog[1].length; i++){
			boolean b = isCSDNBlog(studentBlog[1][i], pattern);
			if(b==false) 
				System.out.println(studentBlog[0][i]+": "+studentBlog[1][i]);
		}

		//写HTML文件
		writeHtml(fileHtml, fileStudent, fileBlog, pattern);
		
		System.out.println("ok,end……");
	}
}