class HtmlParser(object): def _get_new_urls(self,page_url,soup): new_urls = set() normaltitle_data = {} ''' <h3 class="normaltitle">北京行政区酒店</h3> ''' normaltitleS = soup.find_all('h3',class_="normaltitle") for normaltitle in normaltitleS: normaltitle =normaltitle.get_text() if('北京行政区酒店'is normaltitle): links = soup.find_all("a",href=re.compile(r"/html5/hotel/sitemap-beijing1/location")) for link in links: title = link.get_text() normaltitle_data['title'] = link['href'] # print(normaltitle_data) return normaltitle_data