lua网络爬手

在网上看的http://hi.baidu.com/hqwfreefly/blog/category/Lua

经过修改可以实现windows下进行爬,源码如下


http = require("socket.http")
local ext = "jpg"
local url = "http://www.battlenet.com.cn/wow/zh/"
local filter ="jpg"
if not (ext and url) then print("usage:   netget.lua [file-type] [url] [filter-string]\nexample: netget.lua gif http://www.qq.com home") return -1 end
local body, ret
if string.find(url, "http://") ~= 1 then
    print(">> get content from local file " .. url)
    body, ret = io.open(url, "r"):read("*a")
else
    print(">> requesting to " .. url .. " for *." .. ext .. " files...")
    body, ret = http.request(url)
end
if body == nil then
    print("request failed, because of: " .. ret)
    os.exit(1)
end
print(">> request ok")
------------------------------------------------------------------------------
function islegal(saveto)
    local illegalstr = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-.0123456789"
    for i = 1, #saveto - 4 do
        if not string.find(illegalstr, string.sub(saveto, i, i)) then return false end
    end
    return true
end
local start_t = os.time()
local dir = "f:\\netget\\" .. start_t .. "\\";assert(os.execute("mkdir " .. dir), "fail to mkdir dir")
local n = 0
local name = ""
local data = ""
for file, _ in string.gfind(body, "http://[^:]-%." .. ext) do
    local revfile = string.reverse(file)
    local s, e = string.find(revfile, "/")
    if s then
        name = string.reverse(string.sub(revfile, 1, s - 1))
    else
        name = "NONAME" .. n .. ext
    end
    if string.find(name, filter) and islegal(name) then
        print("getting file " .. file)
        data, ret = http.request(file)
        if(data ~= nil) then
   print(dir)
   print(name)
            io.open(dir .. name, "wb"):write(data)
            n = n + 1
        else
            print(">> fail to get file " .. file)
        end
    end
end
------------------------------------------------------------------------------
print(">> done at:          " .. os.date()
    .. "\n>> cost time:        " .. os.time() - start_t .. " seconds"
    .. "\n>> file amount:      " .. n
    .. "\n>> saved to folder: ./" .. dir)
ok,mp3等也可以爬,很好玩。

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值