import requests
import re
def parse_url(url):
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
response=requests.get(url,headers=headers)
text=response.text
# titles=re.findall(r'<h3 class="red">.*?<a href.*?>(.*?)</a>',text,re.DOTALL)
contents_all=re.findall(r'<div id="endtext">(.*?)</div>',text,re.DOTALL)
contents=[]
for content in contents_all:
x=re.sub(r'<.*?>','',content)
contents.append(x.strip())
print(x.strip(''))
print("*"*50)
for content in contents:
print(content)
print("*"*10)
# ds=[]
# for title,content in zip(titles,contents):
# d={
# 'title':title,
# 'content':content
# }
# ds.append(d)
# for d in ds:
# print(d)
# print("*"*50)
def main():
for i in range(1,11):
url="http://www.lovehhy.net/Joke/Detail/QSBK/%s"%i
parse_url(url)
if __name__=='__main__':
main()