通过AsyncTool抓取下载地址,练习并行框架记录

     <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>4.5.15</version>
        </dependency>
        <dependency>
            <groupId>cn.wanghaomiao</groupId>
            <artifactId>JsoupXpath</artifactId>
            <version>0.1.1</version>
        </dependency>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.8</version>
        </dependency>
        <dependency>
            <groupId>com.gitee.jd-platform-opensource</groupId>
            <artifactId>asyncTool</artifactId>
            <version>V1.3-SNAPSHOT</version>
        </dependency>
    <repositories>
        <repository>
            <id>jitpack.io</id>
            <url>https://jitpack.io</url>
        </repository>
    </repositories>
package com.xiaoyun.future;

import cn.hutool.http.HttpUtil;
import cn.wanghaomiao.xpath.exception.NoSuchAxisException;
import cn.wanghaomiao.xpath.exception.NoSuchFunctionException;
import cn.wanghaomiao.xpath.exception.XpathSyntaxErrorException;
import cn.wanghaomiao.xpath.model.JXDocument;
import com.jd.platform.async.callback.ICallback;
import com.jd.platform.async.callback.IWorker;
import com.jd.platform.async.executor.Async;
import com.jd.platform.async.worker.WorkResult;
import com.jd.platform.async.wrapper.WorkerWrapper;
import lombok.Builder;
import lombok.Data;

import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;

/**
 * 执行顺序
 * ....|=> B-01 => |
 * A=>| => B-02 => | => C => D
 * ...|=>  B-03 => |
 */

public class AsyncToolTest {

    private static Map<String, String> cacheMap = new ConcurrentHashMap<>();

    private static List<String> urlList = new LinkedList<>();

    public static void main(String[] args) throws ExecutionException, InterruptedException {
        String url = "https://www.newcger.com/";
        //A: xpath
        String xpathNameFirst = "//ul[@id='mainNav']/li//a/@href";
        //C: xpath
        String xpathNameThree = "//div[@class='info']/a/@href";
        //D: xpath
        String xpathNameEnd = "//div[@class='fd_div']/ul//li/a/@href";


        //第四步 D 根据单个页面 得出网盘下载链接
        TaskFunction<String, Set<Object>> tEnd = new TaskFunction<String, Set<Object>>() {
            @Override
            public Set<Object> action(String xpathName, Map<String, WorkerWrapper> allWrappers) {
                Set<Object> urls = new HashSet<>();
                allWrappers.keySet().stream().parallel().forEach(key -> {
                    if (key.startsWith(TaskEnum.THREE_TASK.toString())) {
                        Set<Object> set = (Set<Object>) allWrappers.get(key).getWorkResult().getResult();
                        set.stream().parallel().forEach(item -> {
                                    List<Object> html = getHtml(XpathEntity.builder().xpathName(xpathName).url(item.toString()).build());
                                    Object down = html.get(1);
//                                    System.out.println("单个页面下载地址:" + down.toString());
                                    urls.add(html.get(1));
                                }
                        );
                    }

                });
                return urls;
            }
        };
        WorkerWrapper<String, Set<Object>> tEndWork = new WorkerWrapper.Builder<String, Set<Object>>()
                .worker(tEnd)
                .callback(tEnd)
                .id(TaskEnum.END_TASK.toString())
                .param(xpathNameEnd)
                .build();


        //第三步 C 根据 所得标题url得出 单个页面地址
        TaskFunction<String, Set<Object>> tThree = new TaskFunction<String, Set<Object>>() {
            @Override
            public Set<Object> action(String xpathName, Map<String, WorkerWrapper> allWrappers) {
                Set<Object> urls = new HashSet<>();
                allWrappers.keySet().stream().parallel().forEach(key -> {
                            if (key.startsWith(TaskEnum.SECOND_TASK.toString())) {
                                String urlResult = (String) allWrappers.get(key).getWorkResult().getResult();
                                List<Object> html = getHtml(XpathEntity.builder().xpathName(xpathName).url(urlResult).build());
                                html.stream().parallel().forEach(item -> {
//                                    System.out.println("单个页面地址:" + item);
                                    urls.add(item);
                                });
                            }
                        }
                );
                return urls;
            }
        };
        WorkerWrapper<String, Set<Object>> tThreeWork = new WorkerWrapper.Builder<String, Set<Object>>()
                .worker(tThree)
                .callback(tThree)
                .id(TaskEnum.THREE_TASK.toString())
                .param(xpathNameThree)
                .next(tEndWork)
                .build();


        //第二步 B-01 获取AE模板url地址:
        TaskFunction<Integer, String> tSecond01 = new TaskFunction<Integer, String>() {
            @Override
            public String action(Integer num, Map<String, WorkerWrapper> allWrappers) {
                List<Object> result = (List<Object>) allWrappers.get(TaskEnum.FIRST_TASK.toString()).getWorkResult().getResult();
                if (result != null) {
                    String url = (String) result.get(num);
//                    System.out.println(url);
                    return url;
                }
                return null;
            }

            @Override
            public void result(boolean success, Integer param, WorkResult<String> workResult) {
                if (success) {
                    String result = workResult.getResult();
                    urlList.add(result);
                }
            }

        };

        WorkerWrapper<Integer, String> tSecond01Work = new WorkerWrapper.Builder<Integer, String>()
                .worker(tSecond01)
                .callback(tSecond01)
                .param(2)
                .id(TaskEnum.SECOND_TASK.toString() + "01")
                .next(tThreeWork)
                .build();

        //第二步 B-02 获取视频素材url地址:
        WorkerWrapper<Integer, String> tSecond02Work = new WorkerWrapper.Builder<Integer, String>()
                .worker(tSecond01)
                .callback(tSecond01)
                .param(4)
                .id(TaskEnum.SECOND_TASK.toString() + "02")
                .next(tThreeWork)
                .build();
        //第二步 B-03 获取音频素材url地址:
        WorkerWrapper<Integer, String> tSecond03Work = new WorkerWrapper.Builder<Integer, String>()
                .worker(tSecond01)
                .callback(tSecond01)
                .param(5)
                .id(TaskEnum.SECOND_TASK.toString() + "03")
                .next(tThreeWork)
                .build();

        //第一步 A: 获取所有链接
        TaskFunction<XpathEntity, List<Object>> tFirst = new TaskFunction<XpathEntity, List<Object>>() {
            @Override
            public List<Object> action(XpathEntity object, Map<String, WorkerWrapper> allWrappers) {
                List<Object> html = getHtml(object);
                return html;
            }
        };
        WorkerWrapper<XpathEntity, List<Object>> tFirstWork = new WorkerWrapper.Builder<XpathEntity, List<Object>>()
                .worker(tFirst)
                .callback(tFirst)
                .param(XpathEntity.builder().url(url).xpathName(xpathNameFirst).build())
                .id(TaskEnum.FIRST_TASK.toString())
                .next(tSecond01Work, tSecond02Work, tSecond03Work)
                .build();


        Async.beginWork(500000, tFirstWork);

        //获取所有结果
        tEndWork.getWorkResult().getResult().stream().forEach(item -> {
            System.out.println(item);
        });

        Async.shutDown();

//        System.out.println(cacheMap);

    }



    private static List<Object> getHtml(XpathEntity xpathEntity) {
        String body = null;
        if (cacheMap.get(xpathEntity.getUrl()) != null) {
            body = cacheMap.get(xpathEntity.getUrl());
        } else {
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            body = HttpUtil.
                    createGet(xpathEntity.getUrl()).
                    execute().body();

            cacheMap.put(xpathEntity.getUrl(), body);
        }
        JXDocument jxDocument = new JXDocument(body);
        List<Object> sel = new ArrayList<>();
        try {
            sel = jxDocument.sel(xpathEntity.getXpathName());
        } catch (NoSuchAxisException e) {
            e.printStackTrace();
        } catch (NoSuchFunctionException e) {
            e.printStackTrace();
        } catch (XpathSyntaxErrorException e) {
            e.printStackTrace();
        }
        return sel;
    }
}

@Data
@Builder
class XpathEntity {
    private String url;
    private String xpathName;
}

enum TaskEnum {
    FIRST_TASK,
    SECOND_TASK,
    THREE_TASK,
    END_TASK;
}


class TaskFunction<T, V> implements IWorker<T, V>, ICallback<T, V> {


    @Override
    public void begin() {

    }

    @Override
    public void result(boolean success, T param, WorkResult<V> workResult) {

    }

    @Override
    public V action(T object, Map<String, WorkerWrapper> allWrappers) {
        return null;
    }

    @Override
    public V defaultValue() {
        return null;
    }
}

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值