Android客户端网络爬虫jsoup原理

在解析网页在Android客户端展示的时候一般使用jsoup工具.速度快,下面是解析的两种方法,

1、使用Thread+handler

protected void httpClientWebData() throws UnsupportedEncodingException {
		String sub=java.net.URLEncoder.encode("亲爱的","UTF-8");
	        String pediyUrl1 = "自己填写url";
		String title = null ;
		String href = null;
		StringBuffer buffer=new StringBuffer();
		try {
			String UESRAGENT_PHONE = "User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) <span style="white-space:pre">						</span>Version/6.0 Mobile/10A405 Safari/8536.25"; 
			Document doc = Jsoup.connect(pediyUrl1).header("User-Agent", UESRAGENT_PHONE).get();
			Elements units = doc.getElementsByClass("g");
			for (int i = 0; i < units.size(); i++){//1421242211966   1421242212577  1252
				Element unit_ele = units.get(i); //1421242030785   1421242031943 1158
				Element h1_ele = unit_ele.getElementsByTag("h2").get(0).child(0);    //得到第一个h2标签里面的第一个child
	            		href= h1_ele.attr("href"); 
				
				Element links = unit_ele.getElementsByClass("s").get(0); 
				title = links.text();  
				buffer=buffer.append(i+" href "+href+" title "+title+"                                   ");
	        }
			 //b=System.currentTimeMillis();
			mHandler.obtainMessage(MSG_SUCCESS,buffer).sendToTarget();
		} catch (ClientProtocolException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

然后在线程中使用:

Runnable httpClientRunnable = new Runnable() {
		@Override
		public void run() {
			try {
				httpClientWebData();
			} catch (UnsupportedEncodingException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	};

最后再UI中使用handler

mHandler = new Handler()
		{
			@Override
			public void handleMessage(Message msg) {
				switch (msg.what) {
				case MSG_SUCCESS:
					Toast.makeText(getApplicationContext(), "URLConnection 连接成功", Toast.LENGTH_SHORT).show();	
					//Document doc = Jsoup.parse((String) msg.obj);
					webDataShow.setText(msg.obj.toString());
					break;
				case MSG_FAILURE:	
					Toast.makeText(getApplicationContext(), "URLConnection 连接失败", Toast.LENGTH_SHORT).show();
				default:
					break;
				}
			}
		};

在onclick中触发开启线程

				httpClientThread = new Thread(httpClientRunnable);
				httpClientThread.start();


方法二:使用异步任务:

//异步获取信息
	String la;
    class Loadhtml extends AsyncTask<String, String, String>
    {
        ProgressDialog bar;
        Document doc;
        @Override
        protected String doInBackground(String... params) {
            // TODO Auto-generated method stub
			try {
				StringBuffer buffer = new StringBuffer();
				String sub = java.net.URLEncoder.encode("亲爱的", "UTF-8");
				String pediyUrl1 = "http://173.194.121.28/custom?newwindow=1&sitesearch=pan.baidu.com&hl=zh-&q="+ sub + "";
				String UESRAGENT_PHONE = "User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, lik<span style="white-space:pre">							</span> e Gecko) Version/6.0 Mobile/10A405 Safari/8536.25";
				Document doc = Jsoup.connect(pediyUrl1)
						.header("User-Agent", UESRAGENT_PHONE).get();
				Elements units = doc.getElementsByClass("g");
				for (int i = 0; i < units.size(); i++) {// 1421242211966
														// 1421242212577 1252
					Element unit_ele = units.get(i); // 1421242030785
														// 1421242031943 1158
					Element h1_ele = unit_ele.getElementsByTag("h2").get(0).child(0); // 得到第一个h2标签里面的第一个child
					String href = h1_ele.attr("href");
					Element links = unit_ele.getElementsByClass("s").get(0);
					String title = links.text();
					buffer = buffer.append(i + " href " + href + " title "+ title + "                                   ");
				}
				la=buffer.toString();
				/*
				 * ContentValues values = new ContentValues();
				 * values.put("Title", buffer); values.put("Url", url);
				 */
			} catch (IOException e) {
				e.printStackTrace();
			}
			return la;
		
        }

        @Override
        protected void onPostExecute(String result) {
            // TODO Auto-generated method stub
            super.onPostExecute(result);
//            Log.d("doc", doc.toString().trim());
            bar.dismiss();
            webDataShow.setText(result);
        }

        @Override
        protected void onPreExecute() {
            // TODO Auto-generated method stub
            super.onPreExecute();
        
            bar = new ProgressDialog(MainActivity.this);
            bar.setMessage("正在加载数据····");
            bar.setIndeterminate(false);
            bar.setCancelable(false);
            bar.show();
        }
        
    }

调用方法:
	Loadhtml dTask = new Loadhtml();  
            dTask.execute();  


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值