一、分析
爬多条新闻内容,新闻的共性是时间前的url链接是一致的,后面时间后改变
二、代码分析
引入变量 ,把链接分成两个部分,变量+固定不变
三、把变量和不变的量找出来
用到元祖遍历,把变量部分给它遍历出来
a = ['/2020-10/19/content_5552336.htm','/2020-10/15/content_5551609.htm']
for x in a:
print(x)
四、代码写入
打印多个内容
import requests
from bs4 import BeautifulSoup
ll = ['/2020-10/19/content_5552336.htm','/2020-10/15/content_5551609.htm','/2020-10/15/content_5551453.htm']
for x in ll:
print(x)
res = requests.get('http://www.gov.cn/zhengce'+x)
res.encoding = 'utf-8'
res = BeautifulSoup(res.text,'html.parser')
article = []
for p in res.select('#UCAP-CONTENT p')[:-1]:
article.append(p.text.strip())
print(article)
五、最终的代码
import requests
from bs4 import BeautifulSoup
ll = ['http://www.gov.cn/zhengce/2020-10/19/content_5552336.htm','http://www.gov.cn/zhengce/2020-10/15/content_5551609.htm','http://www.gov.cn/zhengce/content/2020-10/15/content_5551453.htm','http://www.gov.cn/zhengce/2020-10/13/content_5551032.htm','http://www.gov.cn/zhengce/2020-10/12/content_5550656.htm','http://www.gov.cn/zhengce/2020-10/11/content_5550408.htm','http://www.gov.cn/zhengce/content/2020-10/10/content_5550053.htm','http://www.gov.cn/zhengce/2020-10/09/content_5549986.htm','http://www.gov.cn/zhengce/2020-10/09/content_5549984.htm','http://www.gov.cn/zhengce/content/2020-10/09/content_5549924.htm','http://www.gov.cn/zhengce/content/2020-09/29/content_5548125.htm','http://www.gov.cn/zhengce/content/2020-09/28/content_5547856.htm','http://www.gov.cn/zhengce/content/2020-09/27/content_5547612.htm','http://www.gov.cn/zhengce/2020-09/26/content_5547310.htm','http://www.gov.cn/zhengce/content/2020-09/25/content_5547095.htm','http://www.gov.cn/zhengce/2020-09/23/content_5546496.htm','http://www.gov.cn/zhengce/content/2020-09/23/content_5546373.htm','http://www.gov.cn/zhengce/content/2020-09/21/content_5545407.htm','http://www.gov.cn/zhengce/content/2020-09/21/content_5545345.htm','http://www.gov.cn/zhengce/content/2020-09/21/content_5545394.htm','http://www.gov.cn/zhengce/content/2020-09/21/content_5544926.htm','http://www.gov.cn/zhengce/content/2020-09/18/content_5544465.htm','http://www.gov.cn/zhengce/content/2020-09/15/content_5543645.htm','http://www.gov.cn/zhengce/2020-09/15/content_5543685.htm','http://www.gov.cn/zhengce/2020-09/14/content_5543377.htm','http://www.gov.cn/zhengce/content/2020-09/13/content_5543127.htm','http://www.gov.cn/zhengce/content/2020-09/10/content_5542282.htm','http://www.gov.cn/zhengce/2020-09/08/content_5541752.htm','http://www.gov.cn/zhengce/2020-09/08/content_5541749.htm','http://www.gov.cn/zhengce/content/2020-09/07/content_5541291.htm','http://www.gov.cn/zhengce/content/2020-09/03/content_5540097.htm','http://www.gov.cn/zhengce/content/2020-08/31/content_5538788.htm','http://www.gov.cn/zhengce/content/2020-08/28/content_5538191.htm','http://www.gov.cn/zhengce/2020-08/27/content_5538010.htm','http://www.gov.cn/zhengce/2020-08/25/content_5537371.htm','http://www.gov.cn/zhengce/content/2020-08/20/content_5536179.htm','http://www.gov.cn/zhengce/content/2020-08/14/content_5534841.htm','http://www.gov.cn/zhengce/content/2020-08/12/content_5534361.htm','http://www.gov.cn/zhengce/content/2020-08/11/content_5534081.htm','http://www.gov.cn/zhengce/content/2020-08/04/content_5532370.htm','http://www.gov.cn/zhengce/content/2020-07/31/content_5531555.htm','http://www.gov.cn/zhengce/content/2020-07/31/content_5531613.htm','http://www.gov.cn/zhengce/content/2020-07/30/content_5531274.htm','http://www.gov.cn/zhengce/content/2020-07/27/content_5530404.htm','http://www.gov.cn/zhengce/content/2020-07/27/content_5530403.htm','http://www.gov.cn/zhengce/content/2020-07/24/content_5529813.htm','http://www.gov.cn/zhengce/content/2020-07/23/content_5529417.htm','http://www.gov.cn/zhengce/content/2020-07/22/content_5529034.htm','http://www.gov.cn/zhengce/content/2020-07/21/content_5528614.htm','http://www.gov.cn/zhengce/content/2020-07/21/content_5528615.htm','http://www.gov.cn/zhengce/content/2020-07/21/content_5528602.htm','http://www.gov.cn/zhengce/2020-07/20/content_5528395.htm','http://www.gov.cn/zhengce/content/2020-07/20/content_5528320.htm','http://www.gov.cn/zhengce/content/2020-07/17/content_5527765.htm','http://www.gov.cn/zhengce/content/2020-07/14/content_5526768.htm','http://www.gov.cn/zhengce/content/2020-07/10/content_5525614.htm','http://www.gov.cn/zhengce/content/2020-07/10/content_5525616.htm','http://www.gov.cn/zhengce/content/2020-07/09/content_5525351.htm','http://www.gov.cn/zhengce/content/2020-07/08/content_5525124.htm','http://www.gov.cn/zhengce/content/2020-07/08/content_5525117.htm'
]
for x in ll:
print(x)
res = requests.get(x)
res.encoding = 'utf-8'
res = BeautifulSoup(res.text,'html.parser')
article = []
for p in res.select('#UCAP-CONTENT p')[:-1]:
article.append(p.text.strip())
print(article)
备注:改格式
f = open('D://内容1.txt', 'rb') # 以只读方式打开一个文件,获取文件句柄,如果是读的话,r可以不写,默认就是只读,
line = f.readlines()
for i in line:
data = i.decode()
messae = data.strip()
yiqi = "'"+messae+"'"+','
print(yiqi,end="")
f.close() # 关闭文