需要的jar包
1.xml内容:
<?xml version="1.0" encoding="UTF-8"?>
<contacts xmlns="http://xiaochen.com" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance" xs:schemaLocation="http://xiaochen.com contactsSchema.xsd">
<linkman id="69">
<name>gg</name>
<email>gg@xiaochen.com</email>
<address>武汉</address>
<group>小陈</group>
</linkman>
<linkman id="96">
<name>mm</name>
<email>mm@qq.com</email>
<address>光谷</address>
<group>光谷mm</group>
</linkman>
<linkman id="11">
<name>mm</name>
<email>mm@qq.com</email>
<address>光谷</address>
<group>光谷dd</group>
</linkman>
<linkman id="55">
<name>mm</name>
<email>mm@qq.com</email>
<address>光谷</address>
<group>光谷dd</group>
</linkman>
<linkman id="55"><name>mm</name><email>mm@qq.com</email><address>光谷</address><group>光谷dd</group></linkman><linkman id="55"><name>mm</name><email>mm@qq.com</email><address>光谷</address><group>光谷dd</group></linkman></contacts>
添加内容
package com.softeem.dom4j;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import java.io.FileWriter;
import java.util.List;
public class dom4j_add_01 {
public static void main(String[] args) throws Exception {
SAXReader reader = new SAXReader();
//得到document 对象
// Document document = reader.read(dom4j_del_01.class.getResourceAsStream("/b/contacts.xml"));
Document document = reader.read(dom4j_add_01.class.getClassLoader().getResourceAsStream("b/contacts.xml"));
//得到根节点对象
Element contacts = document.getRootElement();
//得到根节点下面的节点
List<Element> linkmans = contacts.elements();
Element linkman = linkmans.get(linkmans.size() - 1);
//contacts节点下面添加添加一个linkman节点
Element linkman1 = contacts.addElement("linkman");
linkman1.addAttribute("id","55");
linkman1.addElement("name").addText("mm");
linkman1.addElement("email").addText("mm@qq.com");
linkman1.addElement("address").addText("光谷");
linkman1.addElement("group").addText("光谷dd");
//添加后还没完,添加的只是内存中的数据,将内存中的数据保存到文件中
//创建一个文件流,参数为xml文件的路径
FileWriter fw = new FileWriter("resouces/b/contacts.xml");
document.write(fw);
OutputFormat of = new OutputFormat().createPrettyPrint();
XMLWriter xml = new XMLWriter(fw,of);
xml.write(linkman1);
fw.close();
}
}
删除
package com.softeem.dom4j;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import javax.print.Doc;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.List;
public class dom4j_del_01 {
public static void main(String[] args) throws Exception {
SAXReader reader = new SAXReader();
//得到document 对象
// Document document = reader.read(dom4j_del_01.class.getResourceAsStream("/b/contacts.xml"));
Document document = reader.read(dom4j_del_01.class.getClassLoader().getResourceAsStream("b/contacts.xml"));
//得到根节点对象
Element contacts = document.getRootElement();
//得到根节点下面的节点
List<Element> linkmans = contacts.elements();
Element linkman = linkmans.get(linkmans.size() - 1);
//通过父节点删除子节点
contacts.remove(linkman);
//删除后还没完,删除的只是内存中的数据,将内存中的数据保存到文件中
//创建一个文件流,参数为xml文件的路径
FileWriter fw = new FileWriter("resouces/b/contacts.xml");
document.write(fw);
//关闭流
fw.close();
//显示出下面节点的属性值
/*for (Element e :linkman){
List<Element> elist = e.elements();
for (Element el : elist){
System.out.println(el.getText());
}
}*/
}
}
查看节点
public static void main(String[] args) throws Exception {
SAXReader reader = new SAXReader();
//得到document 对象
// Document document = reader.read(dom4j_del_01.class.getResourceAsStream("/b/contacts.xml"));
Document document = reader.read(dom4j_work.class.getClassLoader().getResourceAsStream("b/contacts.xml"));
//得到根节点对象
Element contacts = document.getRootElement();
//得到根节点下面的节点
List<Element> linkmans = contacts.elements();
//显示出下面节点的属性值
for (Element e :linkman){
List<Element> elist = e.elements();
for (Element el : elist){
System.out.println(el.getText());
}
}
}
jsoup 网络爬虫
package com.softeem.jsoup;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
public class jsoup01 {
public static void main(String[] args) throws Exception {
URL url = new URL("https://www.baidu.com/");
Document parse = Jsoup.parse(url, 10000);
Elements imgs = parse.getElementsByTag("img");
System.out.println(imgs.size());
for (int i=0;i<=imgs.size()-1;i++){
String src = imgs.get(i).attr("src");
System.out.println(src);
downLoad(src);
}
}
public static void downLoad(String url){
//封装url为URL对象。
try {
URL u = new URL(url);
//这个方法有时候拿不到流,因为有反爬虫机制,看请求头,是不是有浏览器标识。
// u.openStream()
// 反反爬虫,,,模拟正常人操作,欺骗电脑,设置机制中需要识别的参数,
//设置浏览器请求头。
// User-agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36
URLConnection uc = u.openConnection();
uc.setRequestProperty("User-agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36");
InputStream is = uc.getInputStream();
//复制文件操作
FileOutputStream fos = new FileOutputStream(new Date().getTime() + ".png");
int len;
byte[] by = new byte[1024];
while ((len=is.read(by))!=-1){
fos.write(by);
}
fos.close();
is.close();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}