输入关键字可以查找与之相关的关键字。
比较简单的spiders,望大佬指教。(●’◡’●)
import requests, re
from bs4 import BeautifulSoup
def getHTMLText(url):
try:
kv = {"User-Agent": "Mozilla/5.0"}
r = requests.get(url, headers=kv)
r.encoding = 'utf-8'
return r.text
except:
return ''
def parse(ilt, html):
soup = BeautifulSoup(html, 'html.parser')
for div in soup.find_all('div', class_='para'):
for a in div.find_all('a', href=re.compile(r'/item/.')):
ilt.append([a.attrs['href'], a.string])
def printInfo(ilt):
start_url = 'https://baike.baidu.com'
ans = 1
for i in ilt:
print("key word {0:<3}:{1:<20}\t{2:<}".format(str(ans), str(i[1]), str(start_url+i[0])))
ans += 1
def main():
key = input('输入关键字:')
url = 'https://baike.baidu.com/item/' + key
infoList = []
html = getHTMLText(url)
parse(infoList, html)
printInfo(infoList)
main()