html文本分割文字和图片

20 篇文章 0 订阅
6 篇文章 0 订阅

        这里主要是以p标签和img标签进行分割,如果是以<br />标签换行的话可以先将<br />标签替换为<p>标签再分割,话不多说,上代码

一,以p标签进行分段

public static JSONArray getContentJson4Part(String articleText) {
		JSONArray array = new JSONArray();
	    //根据<P>来分段
	    if (StringUtils.isNotBlank(articleText)) {
	    	StringBuffer currentSeq = new StringBuffer("1");
	    	
	    	int textLength = articleText.length();
	    	int curLength = 0;
	    	while (curLength < textLength) {
	    		int PPosStart = articleText.indexOf("<p", curLength);
	    		if (PPosStart < 0) {//有P标签
	    			PPosStart = articleText.indexOf("<P", curLength);
	    		}
	    		if (PPosStart >= 0) {//有P标签
	    			int PPosEnd = articleText.indexOf("</p>", PPosStart);
	    			if (PPosEnd < 0) {
	    				PPosEnd = articleText.indexOf("</P>", PPosStart);
	    			}
	    			if (PPosEnd != -1) {
		    			if (curLength == PPosStart) {
		    				PPosEnd = PPosEnd + 4;
			    			String text = articleText.substring(PPosStart, PPosEnd);
			    			getImagJson(text, array, currentSeq);
			    			curLength = PPosEnd;	    					
		    			} else {
		    				String text = articleText.substring(curLength, PPosStart);
		    				if (!StringUtils.isBlank(text)) {
		    					getImagJson(text, array, currentSeq);
		    				}
		    				curLength = PPosStart;
		    			}
	    			} else {//没有以</p>结束P标签
	    				int PPosTag = articleText.indexOf("<p", PPosStart+1);
	    				if (PPosTag < 0) {
	    					PPosTag = articleText.indexOf("<P", PPosStart+1);
	    				}
	    				if (PPosTag != -1) {
	    					String text = articleText.substring(PPosStart, PPosTag);
	    					if (!StringUtils.isBlank(text)) {
	    						getImagJson(text, array, currentSeq);
		    				}
	    					curLength = PPosTag;
	    				} else {
	    					String text = articleText.substring(PPosStart, textLength);
	    	    			if (!StringUtils.isBlank(text)) {
	    	    				getImagJson(text, array, currentSeq);
	    	    			}
	    					curLength = textLength;
	    				}
	    			}   			
	    		} else {
	    			String text = articleText.substring(curLength, textLength);
	    			if (!StringUtils.isBlank(text)) {
	    				getImagJson(text, array, currentSeq);
	    			}
					curLength = textLength;	    			
	    		}	    		
	    	}
	    }	    
	    return array;
	}

二 ,分割文字和图片

public static void getImagJson(String text, JSONArray array, StringBuffer currentSeq) {
		if (!StringUtils.isBlank(text)) {
		    int start = 0;
		    int length = text.length();
		    while (start < length) {
				int imgPosStart = text.indexOf("<img", start);
				if (imgPosStart < 0) {
					imgPosStart = text.indexOf("<IMG", start);
				}
				if (imgPosStart >= 0) {
					int imgPosEnd = text.indexOf('>', imgPosStart);
					if (imgPosEnd > imgPosStart) {
						imgPosEnd++;
						String imgUrl = text.substring(imgPosStart, imgPosEnd)
								.replaceFirst(".*src=\"(.+?)\".*", "$1");	                  	
						String tbUrl = imgUrl;
						if (imgPosStart == start) {
							if (!StringUtils.isBlank(tbUrl)) {
								JSONObject obj = getItem("image", tbUrl, currentSeq);
								if (obj != null) {
									array.add(obj);
								}
							}
						} else {
							JSONObject obj = getItem("text",text.substring(start, imgPosStart), currentSeq);
							if (obj != null) {
								array.add(obj);
							}
							if (!StringUtils.isBlank(tbUrl)) {
								JSONObject imgObj = getItem("image", tbUrl, currentSeq);
								if (imgObj != null) {
									array.add(imgObj);									
								}
							}
						}
						start = imgPosEnd;
					} else {
						JSONObject obj = getItem("text",text.substring(start, imgPosStart), currentSeq);
						if (obj != null) {
							array.add(obj);							
						}
						start = imgPosStart + 1;
					}
				} else {
					JSONObject obj = getItem("text", text.substring(start, length), currentSeq);
					if (obj != null) {
						array.add(obj);						
					}
					start = length;
				}
			}
		}
	}

三,处理分割后的结果

public static JSONObject getItem(String type, String value, StringBuffer currentSeq) {
		JSONObject obj = null;
		value = value.replaceAll(" ", " ");		
		if (!StringUtils.isBlank(type) && !StringUtils.isBlank(value)) {
		    if ("text".equals(type)) {
		    	if(value.startsWith("<p><embed") || value.startsWith("<p><tableflag_")){
		    		value = value.replaceAll("<p>", "").replaceAll("</p>", "");
		    	}else {
		    		if(value.indexOf("u-arr-u")!=-1){  //如果是图文描述
		    			type = "desc";
		    		}
					value = value.replaceAll("<[^<^>]*?>", "").replaceAll(
					"</[^<^>]*?>", "").replaceAll("((http|https)://|www\\.)[\\w\\-_]+(\\.[\\w\\-_]+)+([\\w\\-\\.,@?^=%&:/~\\+#]*[\\w\\-\\@?^=%&/~\\+#])?", "");
		    	}
		    }
		    if (!StringUtils.isBlank(value) && !" ".equals(value)) {
		    	obj = new JSONObject();
		    	obj.put("id",currentSeq.toString());
		    	int temp = Integer.parseInt(currentSeq.toString())+1;
		    	currentSeq.setLength(0);
		    	currentSeq.append(temp);
		    	obj.put("type", type);		    	
				obj.put("value",T.toHtml(value.replace("  ", "")));
		    }
		}
		return obj;
	}

四 ,结果示例:

{
    "data": [
        {
            "content": [
                {
                    "id": "1",
                    "type": "text",
                    "value": "眼部是心灵的窗口,又提上了这个俗套话,话虽俗,但是事在理,无论是护肤上还是彩妆上,都无法忽视眼部的存在感。"
                },
                {
                    "id": "2",
                    "type": "image",
                    "value": "https://img.pconline.com.cn/images/upload/upc/tx/ladybbs6/1311/20/c0/28817062_1384931193469_1024x1024.jpg"
                },
                {
                    "id": "3",
                    "type": "text",
                    "value": "所以最重要的就是提前的预防,后悔没有早一点的重视,所以眼部还是会有微微的细纹,如果正在看贴的你还年轻,那么趁早的使用眼霜吧,等到细 纹出现就要追悔莫及咯,好了,入正题,今天带来的是Prox纯焕方程式特护修纹眼霜,顾名思义就能知道这款眼霜的主要功效啦,修护细纹,正式蕊蕊所需要的 啦"
                },
                {
                    "id": "4",
                    "type": "image",
                    "value": "https://img.pconline.com.cn/images/upload/upc/tx/ladybbs6/1311/20/c0/28817061_1384931192102_1024x1024.jpg"
                },
                {
                    "id": "5",
                    "type": "text",
                    "value": "来自Pro-X by Olay的专业级产品,从包装上看起来就超级的有科技感哦,红色的一席,让这个寒冷的秋冬多了丝丝暖意有木有?~"
                },
                {
                    "id": "6",
                    "type": "image",
                    "value": "https://img.pconline.com.cn/images/upload/upc/tx/ladybbs6/1311/20/c0/28817058_1384931188971_1024x1024.jpg"
                },
                {
                    "id": "7",
                    "type": "text",
                    "value": "瓶口是精致的压嘴状,取用起来非常方便,这是蕊蕊大爱的方式,既能方便使用,更可以有效的防止外部以及手部对内在眼霜的污染"
                },
                {
                    "id": "8",
                    "type": "image",
                    "value": "https://img.pconline.com.cn/images/upload/upc/tx/ladybbs6/1311/20/c0/28817057_1384931187308_1024x1024.jpg"
                },
                {
                    "id": "9",
                    "type": "text",
                    "value": "Pro-X by Olay是以皮肤基因组科学为研发基础,所以相对于市面上的普通护肤品要更为专业,在选择护肤品上,蕊蕊不主张以贵为好,以贵为美,重要的还是要适合自己"
                },
                {
                    "id": "10",
                    "type": "image",
                    "value": "https://img.pconline.com.cn/images/upload/upc/tx/ladybbs6/1311/20/c0/28817060_1384931190525_1024x1024.jpg"
                },
                {
                    "id": "11",
                    "type": "text",
                    "value": "【优点】这一款是作为修护眼部肌肤使用,虽然是修护,但是没有细纹的使用会起到预防的作用,早晚使用,不要偷懒,这样才能达到理想的效果,使用眼霜可以有效的改善皮肤屏障,促进皮肤更新。而且很温和无刺激,质地较清爽,眼部肌肤使用之后无负担。"
                },
                {
                    "id": "12",
                    "type": "image",
                    "value": "https://img.pconline.com.cn/images/upload/upc/tx/ladybbs6/1311/20/c0/28817102_1384931252006_1024x1024.jpg"
                },
                {
                    "id": "13",
                    "type": "text",
                    "value": "可以看到,它的PH值非常温和的,无刺激,适合大多数MM的肌肤使用"
                },
                {
                    "id": "14",
                    "type": "image",
                    "value": "https://img.pconline.com.cn/images/upload/upc/tx/ladybbs6/1311/20/c0/28817413_1384931543769_1024x1024.jpg"
                },
                {
                    "id": "15",
                    "type": "text",
                    "value": "【缺点】挺好的一款眼霜,暂时没有太大的缺点。【滋润度】延展性还不错,没有干涸的感觉,轻松一下便可以完全的延展开,指尖滑润, 感觉到眼霜的质地非常的轻盈,尤其是在秋冬这样干燥的季节,这样的质地会相对更加的保湿,完全吸收后,肌肤清爽舒适,使用下来没有一丝的油腻感。测试了下 水分效果,比使用前增加了不少,超级补水的说,相对于一般的眼霜来说,这样的补水效果绝对值得称赞哦"
                },
                {
                    "id": "16",
                    "type": "image",
                    "value": "https://img.pconline.com.cn/images/upload/upc/tx/ladybbs6/1311/20/c0/28817101_1384931250871_1024x1024.jpg"
                },
                {
                    "id": "17",
                    "type": "image",
                    "value": "https://img.pconline.com.cn/images/upload/upc/tx/ladybbs6/1311/20/c0/28817100_1384931249775_1024x1024.jpg"
                },
                {
                    "id": "18",
                    "type": "text",
                    "value": "可以很清楚地看到使用前跟使用后的水分数据区别,从33.3%提升到51.5%"
                }                
            ],
            "createAt": "2013-11-25",          
        }
    ],
    "msg": "请求数据成功",
    "status": 0
}




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值