https://www.tdx.com.cn/url/holiday/ 通达信官网上的这个假日休市数据可以爬取吗?
打开网页,F12查看源码,发现一整年的数据都在这里。
用AutoHotkey下载源代码,下载,提取包含*深圳市场* 的记录,解析即可。输出如下:
url:="https://www.tdx.com.cn/url/holiday/"
source:=UrlDownloadToVar(url,"GBK")
get=<textarea id="data" style="display:none;">
table:=GetNestedTag(source,get)
out:=[]
loop, Parse, table, `r`n
{
if InStr(A_LoopField,"深圳市场")
{
arr:= StrSplit(A_LoopField, "|")
out.push(arr)
str.=arr[1] . "," . arr[2] . "`n"
}
}
FileDelete holiday.txt
FileAppend,%str%, holiday.txt
Run holiday.txt
return
;;;;;;;;; 辅助函数 ;;;;;;;;
GetNestedTag(data,tag,occurrence="1")
{
Start:=InStr(data,tag,false,1,occurrence)
RegExMatch(tag,"i)<([a-z]*)",basetag)
loop
{
until:=InStr(data, "</" basetag1 ">", false, Start, A_Index) + StrLen(basetag1) + 3
Strng:=SubStr(data, Start, until - Start)
StringReplace, strng, strng, <%basetag1%, <%basetag1%, UseErrorLevel
OpenCount:=ErrorLevel
StringReplace, strng, strng, </%basetag1%, </%basetag1%, UseErrorLevel
CloseCount:=ErrorLevel
if (OpenCount = CloseCount)
break
if (A_Index > 250)
{
strng=
break
}
}
if (StrLen(strng) < StrLen(tag))
strng=
return strng
}
UrlDownloadToVar(URL,Charset="",URLCodePage="",Proxy="",ProxyBypassList="",Cookie="",Referer="",UserAgent="",EnableRedirects="",Timeout=-1)
{
ComObjError(0)
WebRequest := ComObjCreate("WinHttp.WinHttpRequest.5.1")
if (URLCodePage<>"")
WebRequest.Option(2):=URLCodePage
if (EnableRedirects<>"")
WebRequest.Option(6):=EnableRedirects
if (Proxy<>"")
WebRequest.SetProxy(2,Proxy,ProxyBypassList)
WebRequest.Open("GET", URL, true)
if (Cookie<>"")
{
WebRequest.SetRequestHeader("Cookie","tuzi")
WebRequest.SetRequestHeader("Cookie",Cookie)
}
if (Referer<>"")
WebRequest.SetRequestHeader("Referer",Referer)
if (UserAgent<>"")
WebRequest.SetRequestHeader("User-Agent",UserAgent)
WebRequest.Send()
WebRequest.WaitForResponse(Timeout)
if (Charset="")
return,WebRequest.ResponseText()
else
{
ADO:=ComObjCreate("adodb.stream")
ADO.Type:=1
ADO.Mode:=3
ADO.Open()
ADO.Write(WebRequest.ResponseBody())
ADO.Position:=0
ADO.Type:=2
ADO.Charset:=Charset
return,ADO.ReadText()
}
}