java parseurl_jurl一个快速简单的URL解析Java库

jurl

68747470733a2f2f6170692e636f646163792e636f6d2f70726f6a6563742f62616467652f47726164652f653861373731353435356534346337336265383937656161313331613830353468747470733a2f2f7472617669732d63692e6f72672f616e74686f6e796e73696d6f6e2f6a75726c2e7376673f6272616e63683d6d617374657268747470733a2f2f636f6465636f762e696f2f67682f616e74686f6e796e73696d6f6e2f6a75726c2f6272616e63682f6d61737465722f67726170682f62616467652e73766768747470733a2f2f696d672e736869656c64732e696f2f6769746875622f6c6963656e73652f6d6173686170652f6170697374617475732e7376673f6d61784167653d3235393230303068747470733a2f2f6a69747061636b2e696f2f762f616e74686f6e796e73696d6f6e2f6a75726c2e737667

Fast and simple URL parsing for Java, with UTF-8 and path resolving support. Based on Go's excellent net/url lib.

Why

Easy to use API - you just want to parse a URL after all.

Fast, 4+ million URLs per second on commodity hardware.

UTF-8 encoding and decoding.

Supports path resolving between URLs (absolute and relative).

Good test coverage with plenty of edge cases.

Supports IPv4 and IPv6.

No external dependencies.

Getting Started

Example:

// Parse URLs

URL base = URL.parse("https://user:secret@example♬.com/path/to/my/dir#about");

URL ref = URL.parse("./../file.html?search=germany&language=de_DE");

// Parsed base

base.getScheme(); // https

base.getUsername(); // user

base.getPassword(); // secret

base.getHost(); // example♬.com

base.getPath(); // /path/to/my/dir

base.getFragment(); // about

// Parsed reference

ref.getPath(); // ./../file.html

ref.getQueryPairs(); // Map = {search=germany, language=de_DE}

// Resolve them!

URL resolved = base.resolveReference(ref); // https://user:secret@example♬.com/path/to/file.html?search=germany&language=de_DE

resolved.getPath(); // /path/to/file.html

// Escaped UTF-8 result

resolved.toString(); // https://user:secret@example%E2%99%AC.com/path/to/file.html?search=germany&language=de_DE

Setup

Add the JitPack repository to your build file.

For gradle:

allprojects {

repositories {

maven { url 'https://jitpack.io' }

}

}

For maven:

jitpack.io

https://jitpack.io

Add the dependency:

For gradle:

dependencies {

compile 'com.github.anthonynsimon:jurl:v0.4.2'

}

For maven:

com.github.anthonynsimon

jurl

v0.4.2

Issues

The recommended medium to report and track issues is by opening one on Github.

Contributing

Want to hack on the project? Any kind of contribution is welcome! Simply follow the next steps:

Fork the project.

Create a new branch.

Make your changes and write tests when practical.

Commit your changes to the new branch.

Send a pull request, it will be reviewed shortly.

In case you want to add a feature, please create a new issue and briefly explain what the feature would consist of. For bugs or requests, before creating an issue please check if one has already been created for it.

License

This project is licensed under the MIT license.

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
HtmlParse解析给定url中的中文字符,输出到文本文件中: url:可配置多个 输出路径:可配置 package com.lhs; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.Enumeration; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Properties; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 现在要求用Java编写一个程序,该程序访问上面的URL,并从页面中, * 提取出上图所示位置的页面信息(仅图中所要求的内容),将其内容按如下格式,输出到控制台 * GrapWeatherInfo * @author lihsh * @version 1.0 * */ public class HtmlParse { List configList = new ArrayList(); private String savePath = "d:\\htmlParse.txt"; private String reg = "[\u4E00-\u9FA5]+"; Set resultSet = new LinkedHashSet(); /** * @param args */ public static void main(String[] args) { HtmlParse hp = new HtmlParse(); hp.getConfig(); hp.start(); hp.write2file(); } /** * 获得配置文件,得到公司要求的文件类型 */ private void getConfig() { Properties props = new Properties(); InputStream in = getClass().getResourceAsStream("/config.properties"); try { props.load(in); Enumeration en = props.propertyNames(); System.out.print("读取配置文件:"); while(en.hasMoreElements()) { String key = (String) en.nextElement(); String value = (String) props.get(key); if(key.startsWith("url")) { configList.add(value); }else if(key.equals("savePath")) { savePath = value; }else if(key.equals("reg")) { reg = value; } System.out.print(key + ":" + value +"; "); } System.out.println(); } catch (IOException e) { e.printStackTrace(); System.out.println("读取配置文件/config.properties出错"); } } /** * 程序总入口 */ private void start() { for(int i = 0; i < configList.size(); i++) { URLConnection con = getConnection(configList.get(i)); readContent(con); System.out.println("读取:" + configList.get(i) + " 结束"); } } /** * 获取url链接 * @return 链接 */ private URLConnection getConnection(String _url) { URLConnection con = null; URL url = null; try { url = new URL(_url); con=url.openConnection(); } catch (IOException e) { e.printStackTrace(); } return con; } /** * 初步过滤出含有天气的行 * @param con url链接 * @return 关键行 */ private void readContent(URLConnection con) { BufferedReader br=null; BufferedWriter bw = null; try { br = new BufferedReader(new InputStreamReader(con.getInputStream(),"UTF-8")); bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(savePath)), "UTF-8")); String line=""; while((line=br.readLine()) != null) { resultSet.addAll(parse(line)); } bw.flush(); } catch (MalformedURLException e) { e.printStackTrace(); } catch (UnsupportedEncodingException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); }finally { try { bw.close(); br.close(); } catch (IOException e) { e.printStackTrace(); } } } /** * 正则表达式匹配关键数据 * @param line * @return */ private Set parse(String line) { Set resSet = new LinkedHashSet(); Pattern pattern = Pattern.compile(reg); Matcher matcher = pattern.matcher(line); while(matcher.find()) { String group = matcher.group(); resSet.add(group); } return resSet; } private void write2file() { BufferedWriter bw = null; try { bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(savePath)), "UTF-8")); for(Iterator it = resultSet.iterator(); it.hasNext();) { bw.write(it.next()); bw.newLine(); } bw.flush(); System.out.println("解析结果保存至:" + savePath); } catch (IOException e1) { e1.printStackTrace(); }finally { try { bw.close(); } catch (IOException e) { e.printStackTrace(); } } } }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值