正则
1》 示例:
<a class="cmc-link" href="https://www.bitforex.com/en/spot/fil_usdt" rel="noopener nofollow noreferrer" target="_blank">FIL/USDT</a>
只需要 /en/spot/fil_usdt 这一段字符串
解决过程:
urls = re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', str(exchange_symbol_url))
#### ['https://www.bitforex.com']
rex = r'<a.*? href="(.*?)".*?>.*?</a>'
urls1 = re.findall(rex, str(exchange_symbol_url))
#### ['https://www.bitforex.com/en/spot/fil_usdt']
symbol_url = str(urls1[0]).replace(str(urls[0]), '')
最终结果: /en/spot/fil_usdt
2> xpath 获取图片src
cmc_url = 'https://coinmarketcap.com/exchanges/binance/' header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36', } proxy_ip = [ '51.158.111.242:8811', '200.255.122.170:8080', ] proxies = { 'http': str(random.choice(proxy_ip)), } cmc_url = 'https://coinmarketcap.com/exchanges/binance/' headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36', } # 获取页面文本数据 from requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings(InsecureRequestWarning) response = requests.get(cmc_url, headers=header, proxies=proxies, verify=False) import time target = self.browser.find_element_by_class_name('load-more-btn') self.browser.execute_script("arguments[0].scrollIntoView();", target) response.encoding = 'utf-8' page_text = response.text tree = etree.HTML(page_text) div_list = tree.xpath('//div[@class="cmc-table__table-wrapper-outer"]/div/table/tbody/tr') for div in div_list: image_url = div.xpath('.//img/@src') print(image_url)