房价貌似快要涨了,俺最近天天加班,俺媳妇来负责找房源,可她那狗屎公司无法上网,只好用ipad 加载rss 离线看,结果狗屎sohu焦点的rss 非全文rss ,为了老婆找房方便,写了点代码,用于生成全文的rss. 希望借此机会,尝试一下代码改善生活的感觉. 不废话吧,直接贴代码了.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
<?php
header(
"Content-type: text/html; charset=utf-8"
);
ini_set
(
"max_excution_time"
,3600);
$url
=
"http://sz.focus.cn/common/xml/rss/news/hot.php"
;
$newItemArr
= parseItems(
$url
);
$xmlItemContent
= makeRssItemString(
$newItemArr
);
//重新生成item 内容,将下载内容替换descript.
//rss xml 的头尾字符串
$xmlPre
= '
<?xml version=
"1.0"
encoding=
"GBK"
?>
<?xml-stylesheet type=
"text/xsl"
href=
"/common/xml/xsl/rss.xsl"
media=
"all"
?>
<?xml-stylesheet type=
"text/css"
href=
"/common/xml/css/rss.css"
media=
"all"
?>
<rss version=
"2.0"
xmlns:focus=
"http://house.focus.cn"
xmlns:F=
"http://house.focus.cn"
docType=
"rss"
F:docType=
"rss"
>
<channel
pageIndex=
""
pageSize=
"20"
recordCount=
""
F:pageIndex=
""
F:pageSize=
"20"
F:recordCount=
""
>
<image>
<title><![CDATA[热点新闻-搜狐焦点网深圳站 ]]></title>
<link>http:
//sz.focus.cn/newscenter/xwsy.html</link>
<url>http:
//images.house.focus.cn/img/newhouselogo.gif</url>
</image>
<title>热点新闻-搜狐焦点网深圳站</title>
<link>http:
//sz.focus.cn/newscenter/xwsy.html</link>
<description><![CDATA[提供最全面最及时的中国房地产新闻资讯 ]]></description>
<copyright>Copyright 2012, sohu.com Inc., all rights reserved</copyright>
<language>zh-cn</language>
<lastBuildDate>Mon, 02 Jul 2012 21:44:21 +0800</lastBuildDate>
<pubDate>Mon, 02 Jul 2012 21:44:21 +0800</pubDate>
<category>地产综合</category>
<ttl>30</ttl>';
$xmlAppend
=
'</channel></rss>'
;
$xmlOutput
=
$xmlPre
.
$xmlItemContent
.
$xmlAppend
;
echo
$xmlOutput
;
/**
* @param $url
* @return array itemObj
*/
function
parseItems(
$url
)
{
$xmlObj
= simplexml_load_file(
$url
);
$items
=
$xmlObj
-> channel -> item;
$resultArr
=
array
();
foreach
(
$items
as
$item
):
$resultArr
[]= makeRssItemObj(
$item
);
endforeach
;
return
$resultArr
;
}
/**
* @param $item
* @return itemObj
*/
function
makeRssItemObj(
$item
)
{
// $item->link = "demo.html";
$urlContent
= fileGetContent(
$item
->link );
//如果不支持curl 就用file_get_content 或者socket 连接来实现.
$urlContent
= @iconv(
"gb2312"
,
"utf-8"
,
$urlContent
);
// $content = iconv("gb2312" , "utf-8" , $content);
preg_match(
"/id=\"newscontent\">([\d\D]+)<div\s+class=\"clear\">/iU"
,
$urlContent
,
$matchArr
);
$item
->description =
$matchArr
[1];
return
$item
;
}
/**
* @param $itemObjArr
* @return string
*/
function
makeRssItemString(
$itemObjArr
)
{
$result
=
""
;
foreach
(
$itemObjArr
as
$itemObj
):
$result
.=
"<item>"
;
$result
.=
"<title><![CDATA[ $itemObj->title ]]></title>"
;
$result
.=
"<link>$itemObj->link</link>"
;
$result
.=
"<description><![CDATA[ $itemObj->description ]]></description>"
;
$result
.=
"<author>mu_rain</author>"
;
$result
.=
"<pubDate>$itemObj->pubDate</pubDate>"
;
$result
.=
"<item>"
;
endforeach
;
return
$result
;
}
// ------------------------------------------------------------------------
/**
* regulary show the string or object or json.
* 规格化显示
*
* @param $str 对象的实例
* @package P
* @subpackage String
* @category Putils
* @author mu_rain
* @return mixed
*/
// ------------------------------------------------------------------------
function
pr(
$array
,
$title
=
'DEBUG'
,
$type
=
'array'
,
$width
=
''
) {
$title
.=
date
(
"Y-m-d H:i:s"
);
$widthStr
=
""
;
if
(
$width
)
$widthStr
=
"width:$width"
.
"px"
;
echo
"<fieldset style=\"-moz-border-radius:5px 5px 5px 5px; -moz-box-shadow:0px 0px 10px rgba(00,00,00,0.45); border: 3px solid transparent; padding:3px; margin-top:20px; \"><legend style=\"color: #069; margin:3px; $widthStr \">$title</legend>"
;
echo
"<div style = '-moz-border-radius:10px 10px 10px 10px;font-size:14px; color:#069; border:1px solid #F0FAF9; font-size:9pt; background:#F0FAF9; padding:5px;'>"
;
print
(
"<pre>"
);
if
(
$type
==
'json'
) {
$array
= json_decode(
$array
);
}
print_r(
$array
);
print
(
"</pre>"
);
echo
"<div>"
;
echo
"</fieldset>"
;
}
// ------------------------------------------------------------------------
/**
*
* get the urlContent use curl
* @package KDG
* @subpackage common
* @category mu_rain
* @author 徐兴
*/
// ------------------------------------------------------------------------
function
fileGetContent(
$url
)
{
$ch
= curl_init();
$timeout
= 30;
curl_setopt(
$ch
, CURLOPT_URL,
$url
);
curl_setopt(
$ch
, CURLOPT_RETURNTRANSFER, 1);
curl_setopt(
$ch
, CURLOPT_CONNECTTIMEOUT,
$timeout
);
curl_setopt(
$ch
, CURLOPT_FOLLOWLOCATION, 1);
$contents
= trim(curl_exec(
$ch
));
curl_close(
$ch
);
return
$contents
;
}
|