JAVA爬虫练习~通过杭电oj账号查询做题数

1 首先我们来看一下 效果:

 

通过查找我们得到 了AC做题数 , 我们爬取HTML代码 , 通过自己的逻辑 筛选 所需要的信息

接下来是代码:


package day_1;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Scanner;

public class test {
	static int asd=0;;

	public static void main(String[] args) {
		Scanner sc = new Scanner(System.in);
		System.out.println("############");
		System.out.println("欢迎查询杭电oj个人信息,请输入账号");
		System.out.print(":");
		int aaa = 0;
		 asd = 0;
		while (true) {
			if (aaa == 0) {
				String str = sc.next();
				System.out.println("....等候");
				System.out.println("~~~~~~~~~~~~~~~~~~~~");
				getHTMLSrc("http://acm.hdu.edu.cn/userstatus.php?user=" + str);
				if (asd == 0) {
					System.out.println("查找的账号不存在");
				}
				asd=0;
				System.out.println("~~~~~~~~~~~~~~~~~~~~");
				System.out.println();
				aaa++;

			}
				System.out.println("可重复输入账号");
				System.out.print(":");
				String str = sc.next();
				System.out.println("....等候");
				System.out.println("~~~~~~~~~~~~~~~~~~~~");
				getHTMLSrc("http://acm.hdu.edu.cn/userstatus.php?user=" + str);
				if (asd == 0) {
					System.out.println("查找的账号不存在");
					
				}
				asd=0;
				System.out.println("~~~~~~~~~~~~~~~~~~~~");
				System.out.println();
			}

		}
	

	public static void getHTMLSrc(String url) {
		InputStream openStream = null;
		BufferedReader buf = null;

		try {
			String line = null;
			URL theUrl = new URL(url);
			openStream = theUrl.openStream();
			buf = new BufferedReader(new InputStreamReader(openStream, "GBK"));
			while ((line = buf.readLine()) != null) {

				if (line.contains("<h1 style=\"color:#1A5CC8\" align=center>")) {
					System.out.print("用户昵称:");
					asd++;
					
					asd++;
					qwe: for (int i = 0; i < line.length(); i++) {
						if (line.charAt(i) == '>') {
							for (int k = i + 1; k < line.length(); k++) {
								if (line.charAt(k) == '<') {
									break qwe;

								}
								System.out.print(line.charAt(k));

							}

						}

					}
					System.out.println();

				} // 用户名

				if (line.contains("<i style=\"color:blue\">from:")) {
					System.out.print("学校:");
//					System.out.println(line);  这里面有用户建立的时间
					qwe: for (int i = line.length() - 1; i >= 0; i--) {
						if (line.charAt(i) == ':') {
							for (int k = i + 2; k < line.length(); k++) {
								if (line.charAt(k) == '&') {
									break qwe;

								}
								System.out.print(line.charAt(k));

							}

						}

					}

				}
				int sq = 0;
				String qwww = "";
				if (line.contains("Problems Solved")) {

					System.out.println();
					System.out.print("AC题目数量:");
					qwee: for (int i = 0; i < line.length(); i++) {
						if (sq == 4) {
							for (int q = i; q < line.length(); q++) {
								if (line.charAt(i) == '<') {
									break qwee;
								}

								qwww = qwww + line.charAt(q);
							}

						}

						if (line.charAt(i) == '>') {
							sq++;
						}

					}

					for (int i = 0; i < qwww.length(); i++) {
						if (qwww.charAt(i) == '<') {
							break;

						} else {
							System.out.print(qwww.charAt(i));

						}

					}
					System.out.println();
				}

			}
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if (openStream != null) {
					openStream.close();
				}
				if (buf != null) {
					buf.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

	}

}

 

 

 

 

 

 

 

 

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值