没想到距离我发布第一篇文章已经过去这么久了,哎说来惭愧,这一段时间没学到啥东西。我的纪念日我就用两个python新手实战来纪念一下吧!!!
话不多说直接上代码
爬取某校官网(随便玩玩,见怪莫怪)
import requests
from bs4 import BeautifulSoup
url = 'http://www.hnrpc.com/'
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0',
'Cookie':'td_cookie=4161134918; JSESSIONID=8E0EF7CEF5946A903692F81A4CCE7489'
}
html = requests.get(url,headers=headers)
html.encoding = 'utf-8'
#print(html.text)
soup = BeautifulSoup(html.text,'lxml')
soups = soup.select('div.navBox div.nav ul li a')
#print(soups)
for i in soups:
if 'http://' in i['href']:
print('网页名称为:',i.text)
print('跳转链接为:',i['href'])
else:
print('网页名称为:', i.text)
print('跳转链接为:', 'http://www.hnrpc.com/' +
python实现百度搜索(过滤广告)
import requests
from bs4 import BeautifulSoup
search = input('请输入搜索内容:')
for i in range(0,100,10):#百度搜索通过pn参数翻页10为一页,这里的for循环就输出了十页的内容
url = 'https://www.baidu.com/s?id=utf-8&wd={}&pn={}'.format(search,i)
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0',
'Cookie':'BIDUPSID=1BC000F314A3FC9E69BFFA92CF3B1C69; PSTM=1694168395; BAIDUID=1BC000F314A3FC9E0DA6947A18DE8ACE:FG=1; BD_UPN=12314753; BDUSS=mc4cElHemgtTGlyN05xT2UyREVjaXdoNzQyZHQ2bWJqdn5yS3RBdDQ3cUd3U1psSVFBQUFBJCQAAAAAAQAAAAEAAABe5NR5AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIY0~2SGNP9kN; BDUSS_BFESS=mc4cElHemgtTGlyN05xT2UyREVjaXdoNzQyZHQ2bWJqdn5yS3RBdDQ3cUd3U1psSVFBQUFBJCQAAAAAAQAAAAEAAABe5NR5AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIY0~2SGNP9kN; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; ab_sr=1.0.1_YWUwNzU2NDQ4ZTBjODRmNmFmNzlhOThiMzIxYjZiNDhiZjQ3MTg5OGIyMjQ2ZTg4ODI3M2IwODNiYmJkYmM5YmE5ZWI3NjkwOWNmNmQ3MmEzODBiMzIxMzgxMjM3NThlMWRjZWM0ZmI3MjU4NzdiYTk0MWI0NWQ5NmYxNGYzMTJiZjkyYjBjYmM1Zjk3NmExM2U0MDg5MTQ0Zjg3MTg5MDM2NGFjZTNjNzY5YjExODhhNTk4NmM3MjU1NGMzNDIw; H_PS_PSSID=39673_39663_39688_39692_39676; BDSFRCVID=c7kOJeC629ppmYoqE_DirpzAsg5MDc7TH6aou1sy32Qw07Gr4SvdEG0PpM8g0Ku-Nb29ogKKLmOTHpuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tR4t_K0-fC03fP36q45H24k0-qrtetJyaR3fVnvvWJ5TMC_wKb3VWt4LQJPH2JjjMe6e0K5z3n8bShPC-tnhDJt-DN5I-58f35uJb-Qe3l02V-JEe-t2ynQDXxKHq4RMW23rWl7mWPJhsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJIjjCaD5QWjNK8t5nfb5kX3JjL2P5HJtKk-PnVePLILPnZKxtqtJ5jQn6wa-jp_IogKR30LJtsjtcR55bnWncKW-Kh--I5hxj6D5JnK6095xO405OT-gDO0KJcb66FHC_whPJvyUADXnO7bnQlXbrtXp7_2J0WStbKy4oTjxL1Db3JKjvMtIFtVDDMJC0bhCI6enJb5ICV-q63-4o8b4o2WbCQBC8M8pcNLTDKeJ8QhJrybxQdfg7HQPcp-b0h8qbnjpO1j4_e5HQeh6tHL6kj_h3-JUO-eh5jDh32XjksD-Rtex6atJ5y0hvcBn6cShnx5fjrDRLbXU6BK5vPbNcZ0l8K3l02V-bIe-t2b6QhDH0tt5-HfRksL-35HJoHJRnG-Jrhq4tehHR2t-n9WDTm_D_X-PJVoJ7lQqnUMJDQ24QeK5Qu3IAf-pPKKR7hhtOwDT5G5TOWQpKJQb5b3mkjbPJzfn02OPKzX4KKet4syPRr2xRnWTRtKfA-b4ncjRcTehoM3xI8LNj405OTbIFO0KJDJCcjqR8ZD6DBj6JP; H_PS_645EC=e621oRHaJDSfgXSHCAn1Zy3dQ2%2FIAXVarmL9z9dCFIo3gFFCAvgm1fG4ZaQ; BA_HECTOR=852g80850g052l04alal2l0n1ikrk4k1r; BAIDUID_BFESS=1BC000F314A3FC9E0DA6947A18DE8ACE:FG=1; BDSFRCVID_BFESS=c7kOJeC629ppmYoqE_DirpzAsg5MDc7TH6aou1sy32Qw07Gr4SvdEG0PpM8g0Ku-Nb29ogKKLmOTHpuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tR4t_K0-fC03fP36q45H24k0-qrtetJyaR3fVnvvWJ5TMC_wKb3VWt4LQJPH2JjjMe6e0K5z3n8bShPC-tnhDJt-DN5I-58f35uJb-Qe3l02V-JEe-t2ynQDXxKHq4RMW23rWl7mWPJhsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJIjjCaD5QWjNK8t5nfb5kX3JjL2P5HJtKk-PnVePLILPnZKxtqtJ5jQn6wa-jp_IogKR30LJtsjtcR55bnWncKW-Kh--I5hxj6D5JnK6095xO405OT-gDO0KJcb66FHC_whPJvyUADXnO7bnQlXbrtXp7_2J0WStbKy4oTjxL1Db3JKjvMtIFtVDDMJC0bhCI6enJb5ICV-q63-4o8b4o2WbCQBC8M8pcNLTDKeJ8QhJrybxQdfg7HQPcp-b0h8qbnjpO1j4_e5HQeh6tHL6kj_h3-JUO-eh5jDh32XjksD-Rtex6atJ5y0hvcBn6cShnx5fjrDRLbXU6BK5vPbNcZ0l8K3l02V-bIe-t2b6QhDH0tt5-HfRksL-35HJoHJRnG-Jrhq4tehHR2t-n9WDTm_D_X-PJVoJ7lQqnUMJDQ24QeK5Qu3IAf-pPKKR7hhtOwDT5G5TOWQpKJQb5b3mkjbPJzfn02OPKzX4KKet4syPRr2xRnWTRtKfA-b4ncjRcTehoM3xI8LNj405OTbIFO0KJDJCcjqR8ZD6DBj6JP; ZFY=1d4513VpTBShJz6GJgq7uHbYlumgukg5gPBoLm1tKlM:C; COOKIE_SESSION=1236_1_9_9_2_39_1_1_9_6_1_23_1009184_0_0_0_1699345098_1699357309_1699598368%7C9%231740058_52_1699357306%7C8'
}
html = requests.get(url,headers=headers)
#print(html.text)
soup = BeautifulSoup(html.text,'lxml')
divs = soup.select('div#content_left div.new-pmd h3 a')
for div in divs:
print('文章标题为:',div.text)
print('跳转链接为:',div['href'])
print('——'*10)
在CSDN的128天里,我经历了成长、收获与感悟。感谢CSDN为我提供了一个良好的创作平台,感谢所有支持我的读者和粉丝,感谢与我一起成长的朋友们。未来,我将继续努力创作高质量的文章,为读者提供更多有价值的信息。同时,我也期待与更多志同道合的朋友一起成长、进步!!!