from urllib.request import urlopen
from urllib.parse import urlparse
from bs4 import BeautifulSoup
import re
import datetime
import random
pages = set()
random.seed(datetime.datetime.now())
#获取页面内链
def getInternalLinks(bsObj,includeUrl):
includeUrl=urlparse(includeUrl).scheme+"://"+urlparse(includeUrl).netloc
internalLinks=[]
for link i
python爬虫(2.获取网页外链与内链)
最新推荐文章于 2022-11-22 15:48:12 发布