用Java爬虫爬取凤凰财经提供的沪深A股所有股票代号名称

爬取程序:

复制代码
package com.ufo.hy.agumaster.tool;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.ufo.hy.agumaster.entity.Stock;

/**

  • Crawl stock code/name from FengHuang finance website:http://app.finance.ifeng.com/list/stock.php?t=hs
  • Main package:jsoup
  • Dependency:
  •     <dependency>
         <groupId>org.jsoup</groupId>
         <artifactId>jsoup</artifactId>
         <version>1.7.3</version>
     </dependency>
    
  • @author heyang

*/
public class FenghuangCrawler {
private static final String SRC_URL=“http://app.finance.ifeng.com/list/stock.php?t=hs”;
private static final String ENCODING = “utf-8”;

// Used to save stock code names
private List<Stock> stockList; 

public FenghuangCrawler() {
    stockList=new ArrayList<Stock>();
    String url=SRC_URL;
    
    int idx=0;
    while(true) {
        System.out.println(url);
        
        String html = getUrlHtml(url,ENCODING);
        Document doc = Jsoup.parse(html,ENCODING);
        
        // Find core node
        Element divtab01 = doc.getElementsByClass("tab01").last();
        
        // Find stocks
        Elements trs=divtab01.getElementsByTag("tr");
        for(Element tr:trs) {
            Elements tds=tr.getElementsByTag("td");
            if(tds.size()>2) {
                Element codeElm=tds.get(0).getElementsByTag("a").last();
                Element nameElm=tds.get(1).getElementsByTag("a").last();
                
                Stock s=new Stock(idx++,codeElm.text(),nameElm.text());
                stockList.add(s);
            }
        }
        
        // Find next page url
        Element lastLink=divtab01.getElementsByTag("a").last();            
        if(lastLink.text().equals("下一页")) {
            url="http://app.finance.ifeng.com/list/stock.php"+lastLink.attr("href");
        }else {
            break;
        }
    }    
    
    for(Stock s:stockList) {
        System.out.println(s);
    }
    System.out.println("共找到"+idx+"个股票.");
}

private String getUrlHtml(String url, String encoding) {
    StringBuffer sb = new StringBuffer();
    URL urlObj = null;
    URLConnection openConnection = null;
    InputStreamReader isr = null;
    BufferedReader br = null;
    try {
        urlObj = new URL(url);
        openConnection = urlObj.openConnection();
        isr = new InputStreamReader(openConnection.getInputStream(), encoding);
        br = new BufferedReader(isr);
        String temp = null;
        while ((temp = br.readLine()) != null) {
            sb.append(temp + "\n");
        }
    } catch (MalformedURLException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            if (isr != null) {
                isr.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return sb.toString();
}

public List<Stock> getStockList() {
    return stockList;
}

public static void main(String[] args) {
    // 根据需要设置代理
    System.setProperty("http.proxyHost", "");
    System.setProperty("http.proxyPort", "");

    new FenghuangCrawler();
}

}
复制代码
运行结果节选:

复制代码

Stock id:3743 code:002752 name:昇兴股份
Stock id:3744 code:000796 name:凯撒旅业
Stock id:3745 code:603233 name:股市行情
Stock id:3746 code:000048 name:京基智农
Stock id:3747 code:300463 name:迈克生物
Stock id:3748 code:300485 name:赛升药业
Stock id:3749 code:603387 name:基蛋生物
Stock id:3750 code:002469 name:三维工程
Stock id:3751 code:600052 name:浙江广厦
Stock id:3752 code:002187 name:广百股份
Stock id:3753 code:300069 name:金利华电
Stock id:3754 code:300317 name:珈伟新能
Stock id:3755 code:002637 name:赞宇科技
Stock id:3756 code:001914 name:招商积余
Stock id:3757 code:000564 name:蚂蚁金融
Stock id:3758 code:002363 name:隆基机械
Stock id:3759 code:603709 name:中源家居
Stock id:3760 code:000802 name:北京文化
Stock id:3761 code:002127 name:南极电商
Stock id:3762 code:600107 name:美尔雅
Stock id:3763 code:002678 name:珠江钢琴
Stock id:3764 code:002083 name:孚日股份
Stock id:3765 code:300325 name:德威新材
共找到3766个股票.

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值