代码
代码如下(示例):
import parsel,requests,re
def pa_dan(zuoze_yemian,filename):
url=zuoze_yemian#'https://www.shicimingju.com/chaxun/zuozhe/5.html'#https://www.gxlcms.com/JavaScript-231573.html
# print(url,type(b))
headers={
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
}
arr=[]
res=requests.get(url=url,headers=headers)
res.encoding=res.apparent_encoding
# print(res.text,res.cookies,res.headers)
select=parsel.Selector(res.text)
obj=re.findall('html"\starget="_blank">《(.*?)》.*\n.*content">\n(.*)\n.*\n.*?(.*)<br />',res.text)
#print(obj)
title=re.findall('<a href="/chaxun/zuozhe/\d+.html">(.*?)</a></div>',res.text)#head title
#有展开项
obj1=re.findall('html"\starget="_blank">《(.*?)》.*\n.*content">\n\s*(.*)<br />(.*?)<br />.*\n.*\n\s*(.*)<br />',res.text)#<br />(.*?)
with open('/Users/python/shige/{}.py'.format(title[0]), 'w', encoding='utf-8', errors='ignore') as f:
f.write('class'+' '+str(title[0])+':'+'\n')
f.write('\t'+'def __init__(self):'+'\n')
for i in obj1:
#print(i)
e=i[1]+i[2]+''.join(i[3].split('<br />'))
#print(type(e))
# print('class'+' '+str(title[0])+':'+'\n'+'\t\t'+'self.'+str(i[0])+'='+str(e))
zhong='\t\t'+'self.'+str(i[0])+'=\''+str(e)+'\'\n'
#print(zhong)
f.write(zhong)
#没展开
#import module_pa
#mkdoc('/Users/python/shige','shiren.py')
obj2=re.findall('html"\starget="_blank">《(.*?)》.*\n.*content">\n\s*(.*?)\s*</div>',res.text)
for j in obj2:
e1=''.join(j[1].split('<br />'))
zhong='\t\t'+'self.'+str(j[0])+'=\''+str(e1)+'\'\n'
f.write(zhong)
f.close()
if __name__=='__main__':
headers={
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
}
arr=[]
res=requests.get(url='https://www.shicimingju.com/chaxun/zuozhe/5.html',headers=headers)
res.encoding=res.apparent_encoding
zuoze=re.findall('<li><a href="(/chaxun/zuozhe.*?)">(.*?)</a></li>',res.text)
#print(zuoze)
for k in zuoze:
url='https://www.shicimingju.com'+k[0]
pa_dan(url,k[1])
总结
每个作者的诗歌写成单个py,可直接调用里面的诗句