遍历CSDN博客

--http://blog.csdn.net/leixiaohua1020/article/list/14?viewmode=contents

function saveData(data)
	local file = io.open("temp.html", "w")
	file:write(data)
	file:close()
end

function getPageCount(url)
	local http = require("socket.http")
	local resp = http.request(url)
	local s = string.find(resp, "<div id=\"papelist\" class=\"pagelist\">")
	local e = string.find(resp, "</div>", s)
	local divData = string.sub(resp, s, e + 5)
	--print(divData)
	local i, j = string.find(divData, "共%d+页")
	local pageCount = string.sub(divData, i + 3, j - 3)
	return pageCount
end

function getTitles(username)
	if(username == nil or username == "") then
		print("username is nil")
		return
	end
	local preUrl = "http://blog.csdn.net/"
	local endUrl = "?viewmode=contents"
	local url = preUrl .. username .. endUrl
	local pageCount = getPageCount(url)
	for i = 1, pageCount do
		local blogUrl = preUrl .. username .. "/article/list/" .. i .. endUrl
		local http = require("socket.http")
		local resp = http.request(blogUrl)
		local pos = 1
		_, pos = string.find(resp, "link_title", pos)
		while(pos ~= nil) do
			local i, j = string.find(resp, "details/%d+\">", pos)
			--print("pageid = " ..  string.sub(resp, i + 8, j - 2))
			local k, _ = string.find(resp, "</a></span>", j)
			print(string.sub(resp, j + 11, k - 23))
			_, pos = string.find(resp, "link_title", pos)
		end
	end
end

getTitles("leixiaohua1020")


	



如果想保存一篇指定的博文(只要正文),该怎么做呢?

其实只要我们预先保存下博文的js、css和一些控制正文的html标记,然后把正文内容填充进去就可以了。

以下lua脚本可以获得

<div id="article_content" class="article_content">
正文
</div>


start.html

<html>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <script src="http://static.blog.csdn.net/scripts/jquery.js" type="text/javascript"></script>
    <link rel="Stylesheet" type="text/css" href="http://static.blog.csdn.net/skin/skin-blue/css/style.css?v=1.1" />
    <link rel="shortcut icon" href="http://c.csdnimg.cn/public/favicon.ico" />
    <link type="text/css" rel="stylesheet" href="http://static.blog.csdn.net/scripts/SyntaxHighlighter/styles/default.css" />
</head>
<body>
<div id="container">
<script type="text/javascript">
    var username = "x_iya";
    var _blogger = username;
    var blog_address = "http://blog.csdn.net/x_iya";
    var static_host = "http://static.blog.csdn.net";
    var currentUserName = "";  
</script>
<div id="body">
<div id="main">
<div class="main">
<div id="article_details" class="details">

end.html

</div>                 
</div>
</div>
<script type="text/javascript" src="http://static.blog.csdn.net/scripts/newblog.min.js"></script>
</div>
</body>
</html> 


lua代码:

function GetHtml(url)
	local http = require("socket.http")
	local resp = http.request(url)
	local s = string.find(resp, "<div id=\"article_content\" class=\"article_content\">")
	local e = string.find(resp, "<!--", s)
	local data = string.sub(resp, s, e - 1)
	return data
end

function SaveData(data)
	local file = io.open("csdn.html", "w")
	file:write(data)
	file:close()
end

function ReadData(filepath)
	local file = io.open(filepath, "r")
	local data = file:read("*a")
	file:close()
	return data
end

--local url = "http://blog.csdn.net/x_iya/article/details/52327827"

if #arg == 1 then
	local url = arg[1]
	local startData = ReadData("start.html")
	local endData = ReadData("end.html")
	local html = startData .. GetHtml(url) .. endData
	SaveData(html)
else
	print("Usage: lua csdn.lua url")
end




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

N3verL4nd

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值