‘User-Agent’: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu ’
‘Chromium/44.0.2403.89 Chrome/44.0.2403.89 Safari/537.36’}]
file_path = “…\Files\Books”
def Parse_Page(self):
req = Request(self.url+str(self.page_num * 25), headers=random.choice(self.headers))
page = urlopen(req).read()
bsObj = BeautifulSoup(page, “html.parser”)
books_items = bsObj.find_all(“a”)
for item in books_items:
if ‘title’ in item.attrs:
self.Parse_Book(item.attrs[‘href’])
self.top_num += 1
self.page_num += 1
def Parse_Book(self, book_url):
print("book_url is: " + book_url)
try:
book_request = Request(book_url, headers=random.choice(self.headers))
book_html = urlopen(book_request).read()
book_bsObj = BeautifulSoup(book_html, “html.parser”)
book_title =