自己的小工具

仅供记录,代码存在缺失

 依赖:

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.11</version>
            <!--            <version>4.1.0</version>-->
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <!--            <version>4.1.0</version>-->
            <version>3.11</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.11</version>
        </dependency>

        <dependency>
            <groupId>javax.servlet</groupId>
            <artifactId>javax.servlet-api</artifactId>
            <version>4.0.1</version>
            <scope>provided</scope>
        </dependency>
import cn.hutool.Hutool;
import cn.hutool.core.io.file.FileWriter;

import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpUtil;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.google.gson.JsonObject;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.directory.api.util.Strings;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.HttpClientUtils;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.DomSerializer;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import restclient.com.fasterxml.jackson.annotation.JsonProperty;

import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import java.io.*;

import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;


/**
 * author:      hyxu.xhy
 * create time: 2021/8/22
 * description: 工具类
 */
@Component
@Slf4j
public class SmallUtils {


    /*
     * @description: 写文件
     * @author: hyxu.xhy
     * @Param filePath:
      * @Param content:
     * @return: void
     */
    public void writeFile(String filePath,String content) throws IOException {
        FileWriter writer = new FileWriter(filePath);
        writer.write(content);
    }


    /*
     * @title: readFile
     * @description: 读取文件所有内容
     * @author: hyxu.xhy
     * @updateTime: 2021/9/27 19:48
     * @return: java.lang.String
     * @throws:
     */
    public String readFile(String filePath) throws IOException {
        byte[] filecontent = new byte[1024];
        try {
            InputStream in = getClass().getClassLoader().getResourceAsStream(filePath);
            in.read(filecontent);
            in.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return new String(filecontent);
    }

    public List<String> readFileLines(String filePath){
        Path path = Paths.get(filePath);
        try {
            return Files.readAllLines(path);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    /*
     * @title: getByXpath
     * @description: 根据xpath匹配html页面
     * @author: hyxu.xhy
     * @updateTime: 2021/9/2 17:10
     * @return: java.lang.String
     * @throws:
     */
    public String getByXpath(String xPath,String html){
        TagNode tagNode = new HtmlCleaner().clean(html);
        Object value = null;
        try {
            org.w3c.dom.Document doc = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
            XPath xpath = XPathFactory.newInstance().newXPath();
            value =  xpath.evaluate(xPath, doc, XPathConstants.NODESET);
        } catch (Exception e) {
            System.out.println("Extract value error. " + e.getMessage());
            e.printStackTrace();
        }
        String nodeString = null;
        StringBuffer resultString = new StringBuffer();
        if (value instanceof NodeList) {
            NodeList nodeList = (NodeList) value;
            for (int j = 0; j < nodeList.getLength(); j++) {
                Node node = nodeList.item(j);
                nodeString = node.getNodeValue();
                if (StringUtils.isNotBlank(nodeString)) {
                    resultString.append(nodeString);//拼接介绍字符串
                }
            }
        }
        return resultString.toString();
    }


    public String getHtml(String url){
        //1.生成httpclient,相当于该打开一个浏览器
        CloseableHttpClient httpClient = HttpClients.createDefault();
        CloseableHttpResponse response = null;
        //2.创建get请求,相当于在浏览器地址栏输入 网址
        HttpGet request = new HttpGet(url);
        String html = null;
        try {
            //3.执行get请求,相当于在输入地址栏后敲回车键
            response = httpClient.execute(request);

            //4.判断响应状态为200,进行处理
            if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
                //5.获取响应内容
                HttpEntity httpEntity = response.getEntity();
                html = EntityUtils.toString(httpEntity, "utf-8");
            } else {
                //如果返回状态不是200,比如404(页面不存在)等,根据情况做处理,这里略
                System.out.println("返回状态不是200");
                System.out.println(EntityUtils.toString(response.getEntity(), "utf-8"));
            }
        } catch (ClientProtocolException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            //6.关闭
            HttpClientUtils.closeQuietly(response);
            HttpClientUtils.closeQuietly(httpClient);
        }
        return html;
    }

    //使用代理爬取页面
    public String getHtmlProxy(String url){
        Map map = new HashMap();
        map.put("ip","10.200.125.65");
        map.put("port","8080");


        //1.生成httpclient,相当于该打开一个浏览器
        CloseableHttpClient httpClient = null;
        CloseableHttpResponse response = null;
        //2.创建get请求,相当于在浏览器地址栏输入 网址
        HttpGet request = new HttpGet(url);
        String html = null;
        try {
            HttpHost proxy = new HttpHost("10.200.125.65", 8080);
            RequestConfig defaultRequestConfig = RequestConfig.custom()
                                        .setConnectTimeout(6000).setSocketTimeout(6000)
                                     .setProxy(proxy).build();
            httpClient = HttpClients.custom().setDefaultRequestConfig(defaultRequestConfig).build();

            //3.执行get请求,相当于在输入地址栏后敲回车键
            response = httpClient.execute(request);

            //4.判断响应状态为200,进行处理
            if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
                //5.获取响应内容
                HttpEntity httpEntity = response.getEntity();
                html = EntityUtils.toString(httpEntity, "utf-8");
            } else {
                //如果返回状态不是200,比如404(页面不存在)等,根据情况做处理,这里略
                System.out.println("返回状态不是200");
                System.out.println(EntityUtils.toString(response.getEntity(), "utf-8"));
            }
        } catch (ClientProtocolException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            //6.关闭
            HttpClientUtils.closeQuietly(response);
            HttpClientUtils.closeQuietly(httpClient);
        }
        return html;
    }

    public static String regexFirst(String content,String regex){
        String result = null;
        Pattern r = Pattern.compile(regex);
        // 创建 matcher 对象
        Matcher m = r.matcher(content);
        if(m.find()){
            result = m.group(1);
        }
        return result;
    }

    public static List<String> regexAll(String content,String regex){
        List<String> result = new ArrayList<>();
        Pattern r = Pattern.compile(regex);
        Matcher m = r.matcher(content);
        while(m.find()) {
            result.add(m.group(1));
        }
        return result;
    }

    /*
     * @title: ReadExcel
     * @description: 读取sheet
     * @author: hyxu.xhy
     * @updateTime: 2021/8/22 19:37
     * @throws:
     */
    public Sheet getSheet(String filePath, int sheetNum){
        InputStream is = null;
        Workbook workbook = null;
        try {
            is = this.getClass().getClassLoader().getResourceAsStream(filePath);
            workbook = new XSSFWorkbook(is);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        try {
            is.close();//关闭资源
        } catch (IOException e) {
            e.printStackTrace();
        }
        return workbook.getSheetAt(sheetNum);
    }

    /*
     * @title: readExcel
     * @description: 读取excel
     * @author: hyxu.xhy
     * @updateTime: 2021/8/22 20:09
     * @throws:
     */
    public List<Map<String,String>> readExcel(String filePath, int sheetNum){
        Sheet sheet = getSheet(filePath, sheetNum);
        List<Map<String,String>> dataList = new ArrayList<>();
        boolean isHead = false;
        int startIndex=0;
        int lastIndex=0;

        //读取表头
        List<String> headRow = new ArrayList<>();
        for(int rowIndex=0;rowIndex<=sheet.getLastRowNum();rowIndex++){
            Row row = sheet.getRow(rowIndex);
            //过滤空行
            if(isRowEmpty(row)){
                continue;
            }
            //获取表头数据
            if(!isHead){
                startIndex = row.getFirstCellNum();
                lastIndex = row.getLastCellNum();
                isHead = true;
                for(int cellIndex=startIndex;cellIndex<lastIndex;cellIndex++){
                    Cell cell = row.getCell(cellIndex);
                    cell.setCellType(Cell.CELL_TYPE_STRING);
                    headRow.add(cell.toString());
                }
            }else{
                //读取表身
                Map<String,String> map = new HashMap<>();
                for(int k=startIndex;k<lastIndex;k++){
                    Cell cell = row.getCell(k);
                    if(cell == null){
                        map.put(headRow.get(k),"");
                    }else{
                        cell.setCellType(Cell.CELL_TYPE_STRING);
                        map.put(headRow.get(k),cell.toString());
                    }

                }
                dataList.add(map);
            }
        }
        return dataList;
    }

    /*
     * @title: isRowEmpty
     * @description: 是否空行
     * @author: hyxu.xhy
     * @updateTime: 2021/8/22 20:16
     * @return: boolean
     * @throws:
     */
    public static boolean isRowEmpty(Row row){
        if(row==null)
            return true;
        for (int i = row.getFirstCellNum(); i < row.getLastCellNum(); i++) {
            Cell cell = row.getCell(i);
            if (cell != null && cell.getCellType() != Cell.CELL_TYPE_BLANK){
                return false;
            }
        }
        return true;
    }



}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值