爬取代码为import urllib.request
from bs4 import BeautifulSoup
#coding: utf-8
class xiaoShuo():
def __init__(self,url,parLabelValue,parLabelType,parLabel,clildLabelValue,clildLabelType,clildLabel,enc):
self.url = url;
self.parLabelValue = parLabelValue;
self.parLabelType = parLabelType;
self.enc=enc;
self.parLabel = parLabel;
self.clildLabelValue = clildLabelValue;
self.clildLabelType = clildLabelType;
self.clildLabel = clildLabel;
def getUrlContent(self):
response = urllib.request.urlopen(self.url);
html = response.read().decode(self.enc);
pageNode = BeautifulSoup(html, 'html.parser')
iterms = pageNode.find_all(self.parLabel,{self.parLabelType:self.parLabelValue})
for i in range(len(iterms)):
tagA = iterms[i].select("a");