import requests
import re
import pandas as pd
df = pd.read_excel('F:\if1.xlsx')
column_data = df['Journal+Info']
numbers=[]
for cell in column_data:
cell=str(cell)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
url = 'https://www.ablesci.com/journal/index?keywords='+cell
response = requests.get(url, headers=headers)
html_text = response.text
str1 = "<span>"
str2 = "</span><span title='比上一年度"
def find_number_between_strings(str1, str2):
pattern = re.compile(rf"{re.escape(str1)}\D*(\d+(\.\d+)?)\D*{re.escape(str2)}")
match = re.search(pattern, str(html_text))
if match:
number = match.group(0)
number = re.sub(r'[^0-9.]', '', number)
return number
else:
number = 0
return number
number = find_number_between_strings(str1, str2)
print(number)
numbers.append(number)
df['Number'] = numbers
df.to_excel('F:\if1.xlsx', index=False)