抓取html网页

/*
 * Created on 2005-8-10
 *
 * TODO To change the template for this generated file go to
 * Window - Preferences - Java - Code Style - Code Templates
 */
package org.test8;

import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.Text;
import org.htmlparser.tags.CompositeTag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.ParserException;

/**
 * @author xuehao
 *
 * TODO To change the template for this generated type comment go to Window -
 * Preferences - Java - Code Style - Code Templates
 */
public class Weather
{
    private String city;

    private String state;

    private String temperature;

    private String time;

    private String wind;

    private String windpower;

    private long UpdateTime;

    /**
     * @return
     */
    public String getTemperature()
    {
        return temperature;
    }

    /**
     * @return
     */
    public String getTime()
    {
        return time;
    }

    /**
     * @return
     */
    public String getWind()
    {
        return wind;
    }

    /**
     * @return
     */
    public String getWindpower()
    {
        return windpower;
    }

    /**
     * @param string
     */
    public void setTemperature(String string)
    {
        temperature = string;
    }

    /**
     * @param string
     */
    public void setTime(String string)
    {
        time = string;
    }

    /**
     * @param string
     */
    public void setWind(String string)
    {
        wind = string;
    }

    /**
     * @param string
     */
    public void setWindpower(String string)
    {
        windpower = string;
    }

    /**
     * @return
     */
    public long getUpdateTime()
    {
        return UpdateTime;
    }

    /**
     * @param l
     */
    public void setUpdateTime(long l)
    {
        UpdateTime = l;
    }

    /**
     * @return
     */
    public String getState()
    {
        return state;
    }

    /**
     * @param string
     */
    public void setState(String string)
    {
        state = string;
    }

    /**
     * @return
     */
    public String getCity()
    {
        return city;
    }

    /**
     * @param string
     */
    public void setCity(String string)
    {
        city = string;
    }

    public static void main( String args[] ) throws Exception
    {
        Weather w = new Weather();
        w.parserWeather();
    }
    /** *********************************************** */
    public Weather parserWeather() throws Exception
    {
        Weather w = new Weather();
        try
        {

            Parser parser =
             new Parser("file://localhost/e:/testproject/zhenjiang.htm");
            //Parser parser = new Parser(
            //        "http://weather.news.sohu.com/query.php?city=镇江");

            parser.setEncoding("gb2312");
//            System.out.println( TableTag.class );
           
            Node nodes[] = parser.extractAllNodesThatAre(TableTag.class);

            TableTag table = (TableTag) nodes[3];
            //temperature
            Text[] stringNodes = table.digupStringNode("镇江");
//            Text[] stringNodes = table.digupStringNode("zhenjiang");
/*            for( int k=0; k<stringNodes.length; k++ )
            {
                System.out.println( stringNodes[k] );
            }
*/
            System.out.println( stringNodes.length );
            if( stringNodes.length==0 )
            {
                System.out.println( "stringNodes.length==0, exit app!" );
                return null;
            }else
            {
                System.out.println( "stringNodes.length!=0" );
            }
/*           
            Text name = stringNodes[0];
//            System.out.println( name );
           
            w.setCity(name.toPlainTextString());
            CompositeTag td = (CompositeTag) name.getParent();
            CompositeTag tr = (CompositeTag) td.getParent();
            int columnNo = tr.findPositionOf(td);
            TableColumn nextColumn = (TableColumn) tr.childAt(5);
            Node expectedName = nextColumn.childAt(0);
            Node expectedName2 = nextColumn.childAt(2);
            //System.out.println(expectedName.getText());
            //System.out.println(expectedName2.getText());
            w.setState(expectedName.getText());
            w.setTemperature(expectedName2.getText());
            //time
            stringNodes = table.digupStringNode("时间");
            name = stringNodes[0];
            //System.out.println(name.toPlainTextString());

            String time = name.toPlainTextString().substring(4,
                    name.toPlainTextString().length()).trim();
            //System.out.println(time);
            w.setTime(time);
            //wind
            stringNodes = table.digupStringNode("风向");
            name = stringNodes[0];
            //System.out.println(name.toPlainTextString());

            String wind = name.toPlainTextString().substring(4,
                    name.toPlainTextString().length()).trim();
            //System.out.println(wind);
            w.setWind(wind);
            //wind power
            stringNodes = table.digupStringNode("风力");
            name = stringNodes[0];
            //System.out.println(name.toPlainTextString());

            String windpower = name.toPlainTextString().substring(4,
                    name.toPlainTextString().length()).trim();
            //System.out.println(windpower);
            w.setWindpower(windpower);

            w.setUpdateTime(System.currentTimeMillis());
*/
        } catch (ParserException e)
        {

            e.printStackTrace();
        }
      
        return w;
    }

    //    解析出来的代码必须做缓存处理,
/*
    private static long TIME_TO_LIVE = 1000 * 60 * 60 * 12;

    private Weather loadWeather() throws Exception
    {
        Weather weather = weather = (Weather) cache_.get("chinaweather");
        long currentTime = System.currentTimeMillis();
        if (weather != null
                && currentTime < (weather.getUpdateTime() + TIME_TO_LIVE))
        {
            cache_.remove("chinaweather");
            weather = null;
        }

        if (weather == null)
        {
            synchronized (cache_)
            {
                weather = parserWeather();
                cache_.put("chinaweather", weather);
            }
        }

        return weather;
    }
*/   
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值