本文主要介绍,如何判断一个链接为跳转链接。并且判断出当前链接的域名以及跳转之后的域名。
import numpy as np
import pandas as pd
import requests
import re
import urllib
def __init__(self,url,text,content):
# 初始化
self.url = url
self.text = text
self.content = content
url_target = []
text_target = []
content_target = []
domain_target = []
def get_location_url(url):
try:
res = requests.head(url)
url_target = res.headers.get('location')
return(url_target)
except Exception as e:
return(e)
def pick_url_element(text):
text = str(text)
REG_CN ="[A-Za-z0-9?=//.&%:_-]";
for i in text:
if re.match(REG_CN,i) != 'none':
text_target = ''.join(re.findall(REG_CN,text))