# -*- coding: utf-8 -*-
import re
import urllib
url=r'http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gbk&word=songzhongji'
#读取网页内容
imgcontent=urllib.urlopen(url).read()
flag_star='"objURL"'
flag_end='"fromURL"'
strdata='app.setData'
star_index= imgcontent.find(strdata)
end_index= imgcontent.find(strdata,imgcontent.find(strdata)+1)
strtxt=imgcontent[star_index:end_index]
list_star=[]
list_end=[]
pstar=strtxt.find(flag_star,0)
pend=strtxt.find(flag_end,0)
result_list=[]
while pstar!=-1 and pend!=1:
list_star.append(pstar)
list_end.append(pend)
pstar=strtxt.find(flag_star,pstar+1)
pend=strtxt.find(flag_end,pend+1)
if len(list_star)==len(list_end):
for i in range(len(list_star)):
result_list.append(strtxt[list_star[i]:list_end[i]].rstrip())
else:
print("有问题!")
fp=open('songzhongji.txt','w')
for line in result_list:
fp.write(line[10:-2])
fp.write("\n")
fp.close()