想查看CSCD的排序,官方没有,所以就根据所有期刊名称,在知网查影响因子,然后给出排序!
想发牛逼的期刊就从前往后找,想发水刊就从后往前找!
【CSCD 2021-2022 原始期刊列表】
http://sciencechina.cn/style/sourcelist21_22.pdf
中国科学引文数据库来源期刊列表(2021-2022 年度)
附上排序后的excel结果(仅针对中文期刊):
https://docs.qq.com/sheet/DV2NSSmdXWXRBdU1lhttps://docs.qq.com/sheet/DV2NSSmdXWXRBdU1l
【操作步骤】
1、将CSCD期刊PDF转为excel
(在线工具:PDF转Excel——免费在线PDF转换成Excel转换器)
2、读取excel获取期刊名称
3、根据期刊名称去知网查影响因子
4、保存所有数据到excel
5、对excel排序
python 代码如下:
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
import pandas as pd
from time import sleep
#删除空白字符
def Delnone(string):
string = string.replace(" ", "")
string = string.replace("\n", "")
return string
def ContainEnglish(string):
import re
return bool(re.search('[a-z]', string))
if __name__ == '__main__':
writer = pd.ExcelWriter("./中文cscd影响因子.xls") # 设置保存Excel 路径
#读取CSCD - Excel的名字与基本信息
df = pd.read_excel("cscd.xlsx")
# print(df.head())
all_table = [] #保存所有数据
all_table.append(["期刊名称", "ISSN", "备注", "复合影响因子", "综合影响因子"])
#遍历所有title,查找影响因子
for i in range(df.shape[0]):
title = df.ix[i].values[2]
ISSN = df.ix[i].values[3]
remark = df.ix[i].values[4]
#排除英文期刊
if ContainEnglish(str(title)) == True:
# print(title)
continue
#删除空白行
if str(remark) != "核心库" and str(remark) != "扩展库":
continue
if "." in title:
title = title.replace(". ","(")
title += ")"
#请求网页设置
# title = "自动化学报"
url = "https://navi.cnki.net/knavi/journals/searchbaseinfo"
post_data = "searchStateJson=%7B%22StateID%22%3A%22%22%2C%22Platfrom%22%3A%22%22%2C%22QueryTime%22%3A%22%22%2C%22Account%22%3A%22knavi%22%2C%22ClientToken%22%3A%22%22%2C%22Language%22%3A%22%22%2C%22CNode%22%3A%7B%22PCode%22%3A%22JOURNAL%22%2C%22SMode%22%3A%22%22%2C%22OperateT%22%3A%22%22%7D%2C%22QNode%22%3A%7B%22SelectT%22%3A%22%22%2C%22Select_Fields%22%3A%22%22%2C%22S_DBCodes%22%3A%22%22%2C%22QGroup%22%3A%5B%7B%22Key%22%3A%22subject%22%2C%22Logic%22%3A1%2C%22Items%22%3A%5B%5D%2C%22ChildItems%22%3A%5B%7B%22Key%22%3A%22txt%22%2C%22Logic%22%3A1%2C%22Items%22%3A%5B%7B%22Key%22%3A%22txt_1%22%2C%22Title%22%3A%22%22%2C%22Logic%22%3A1%2C%22Name%22%3A%22TI%22%2C%22Operate%22%3A%22%25%22%2C%22Value%22%3A%22'"\
+quote(title)+\
"'%22%2C%22ExtendType%22%3A0%2C%22ExtendValue%22%3A%22%22%2C%22Value2%22%3A%22%22%7D%5D%2C%22ChildItems%22%3A%5B%5D%7D%5D%7D%5D%2C%22OrderBy%22%3A%22OTA%7CDESC%22%2C%22GroupBy%22%3A%22%22%2C%22Additon%22%3A%22%22%7D%7D&displaymode=1&pageindex=1&pagecount=21&index=subject&searchType=%E5%88%8A%E5%90%8D(%E6%9B%BE%E7%94%A8%E5%88%8A%E5%90%8D)&clickName=&switchdata=search&random=0.6063051741632248"
headers = {
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1",
"Content-Type": "application/x-www-form-urlencoded",
}
#获取网页
res = requests.post(url,data=post_data,headers=headers)
cont = res.text
#解析网页
soup = BeautifulSoup(cont, 'html.parser')
#得到影响因子
detial_list = soup.find_all("div",{"class":"detials"})
if len(detial_list) == 0:
print("知网无信息:", title)
continue
IS_nodata = False
#遍历搜索结果
for j in range(len(detial_list)):
first = detial_list[j]
first_title = first.find("h1").text
first_title = Delnone(first_title)
# print(first_title)
if first_title == title:
print("查找成功:",title)
IF = first.find_all("p")
IF1 = Delnone(IF[0].text)[7:] #复合影响因子
IF2 = Delnone(IF[1].text)[7:] #综合影响因子
print(IF1)
print(IF2)
break
else:
if j == len(detial_list)-1 and len(detial_list) > 1:
print("查找error:", title)
IS_nodata = True
if IS_nodata == True:
continue
all_table.append([title,ISSN,remark,IF1,IF2])
# sleep(1)
#保存所有数据为Excel
all_table = pd.DataFrame(all_table)
all_table.to_excel(writer, index=False)
writer.save()