【文献计量学】实际计算几乎所有已知的文献计量指标（Matlab代码实现）

Ps.729

于 2024-06-07 09:30:53 发布

阅读量1k

点赞数 28

文章标签：前端

本文链接：https://blog.csdn.net/weixin_67304359/article/details/139448446

版权

💥💥💞💞欢迎来到本博客❤️❤️💥💥

🏆博主优势：🌞🌞🌞博客内容尽量做到思维缜密，逻辑清晰，为了方便读者。

⛳️座右铭：行百里者，半于九十。

📋📋📋本文目录如下：🎁🎁🎁

目录

⛳️赠与读者

💥1 概述

📚2 运行结果

🎉3 参考文献

🌈4 Matlab代码实现

⛳️赠与读者

👨‍💻做科研，涉及到一个深在的思想系统，需要科研者逻辑缜密，踏实认真，但是不能只是努力，很多时候借力比努力更重要，然后还要有仰望星空的创新点和启发点。当哲学课上老师问你什么是科学，什么是电的时候，不要觉得这些问题搞笑。哲学是科学之母，哲学就是追究终极问题，寻找那些不言自明只有小孩子会问的但是你却回答不出来的问题。建议读者按目录次序逐一浏览，免得骤然跌入幽暗的迷宫找不到来时的路，它不足为你揭示全部问题的答案，但若能让人胸中升起一朵朵疑云，也未尝不会酿成晚霞斑斓的别一番景致，万一它居然给你带来了一场精神世界的苦雨，那就借机洗刷一下原来存放在那儿的“躺平”上的尘埃吧。

或许，雨过云收，神驰的天地更清朗.......🔎🔎🔎

💥1 概述

文献计量学，实际计算几乎所有已知的文献计量指标。事实上，每位研究者都会了解自己的文献计量表现。有许多软件可以计算这些指标，如Publish or Perish或Google Scholar。无论如何，这些软件需要额外的工作，比如识别要排除同名作者的合适论文。此外，Google Scholar 不高效地列出一篇论文的所有作者（它会截断列表），因此某些指标是不准确的。

📚2 运行结果

部分代码：

disp('BIBLIOMETRICS'); disp(tr)
%Descriptive statistics
disp('Descriptive statistics'); disp(tr)
n=length(C); Ctot=sum(C);
fprintf('Total number of papers: %i\n',n)
fprintf('Total number of citations: %i\n',Ctot)
fprintf('Min: %i - Max: %i\n',min(C),max(C))
fprintf('Mode of citations per paper: %i\n',mode(C))
fprintf('Median of citations per paper: %0.1f\n',median(C))
M=mean(C); D=std(C); CV=D/M*100;
fprintf('Mean number of citations per paper: %0.1f\n',M)
fprintf('Variation coefficient (CV): %0.2f%%\n',CV)
fprintf('Adjusted Variation coefficient (CV''): %0.2f%%\n',CV*(1+1/(4*n)))
clear M D CV
disp(' ')
%The Lorenz Curve
[Csorted,idx]=sort(C);
x=1:1:n; cC=cumsum(Csorted);
F=x./max(x);
L=cC/Ctot;
Gcoeff=1-2*trapz(F,L);
fprintf('Gini''s coefficient: %0.2f\n',Gcoeff')
scrsz = get(groot,'ScreenSize');
hfig1=figure; POS=scrsz; POS(3)=POS(3)/2;
set(hfig1,'Position',POS)
hold on
patch([0 1 1 0],[0 1 0 0],[192 192 192]./255)
patch([0 F 1 0],[0 L 0 0],'w')
Le1=plot([0 1],[0 0],'g','LineWidth',2);
plot([1 1],[0 1],'g','LineWidth',2)
Le2=plot([0 1],[0 1],'b--','LineWidth',2);
Le3=plot(F,L,'r-','LineWidth',2);
hold off
title('Lorenz curve of citations'); xlabel('% of papers'); ylabel('% of citations')
legend([Le1 Le2 Le3],'Line of perfect inequality','Line of perfect equality','Lorenz curve','Location','NorthEastOutside')
axis square

disp(tr)
if ~isempty(Y)
Ny=(str2double(datestr(now,'yyyy'))-Y);
%Harzing in her Publish or Perish add 1 to Ny. I don't know why. If we
%are in 2010 and I have just published a paper in 2010 it haven't 1
%year, but only few months....
my=max(Ny); cty=sort(crosstab(Y)); lcty=length(cty);
fprintf('Years: %i\n',my)
fprintf('Years of publications\t first: %i \t last: %i\n',min(Y),max(Y))
fprintf('Papers per year\t Min: %i \t Max: %i\n',min(cty),max(cty))
fprintf('Mode of papers per year: %i\n',mode(cty))
fprintf('Median of papers per year: %i\n',median(cty))
fprintf('Mean number of papers per year: %0.1f\n',mean(cty))
fprintf('Mean number of citations per year: %0.1f\n',Ctot/my)
disp(tr)
end
if ~isempty(A)
fprintf('Authors\tMin: %i - Max: %i\n',min(A),max(A))
fprintf('Mode of Authors per paper: %i\n',mode(A))
fprintf('Median of Authors per paper: %0.1f\n',median(A))
fprintf('Mean number of Authors per paper: %0.1f\n',mean(A))
fprintf('Citations per Author: %0.1f\n',sum(C./A))
disp(tr)
end
disp(' ');

disp('Bibliometric indices'); disp(' ')
disp('Citations indices'); disp(tr)
%Hirsch, J.E. (2005) An index to quantify an individual's scientific
%research output, arXiv:physics/0508025 v5 29 Sep 2006.
%[...] The h-index is defined as follows:
%A scientist has index h if h of his/her Np papers have at least h
%citations each, and the other (Np-h) papers have no more than h citations
%each. [...] The relation between Ctot and h will depend on the detailed
%form of the particular distribution, and it is useful to define the
%proportionality constant a as Ctot=ah^2. I find empirically that a ranges
%between 3 and 5.
Csorted=fliplr(Csorted); idx=fliplr(idx);
Hidx=sum(Csorted>=x); H2=Hidx^2;
%Fenner T. et al (2018) A novel bibliometric index with a simple geometric
%interpretation - https://doi.org/10.1371/journal.pone.0200098

z=Csorted.*(1:1:length(C));
Chiidx=sqrt(find(z==max(z))); clear z
fprintf('Hirsch''s h-index: %i \t a: %0.2f\t',Hidx,Ctot/H2)
fprintf('Fenner''s chi-index: %0.4f',Chiidx)
if ~isempty(Y)
%One way to facilitate comparisons between academics with different
%lengths of academic careers is to divide the h-index by the number of
%years the academic has been active (measured as the number of years
%since the first published paper). Hirsch (2005) proposed this measure
%and called it m.
fprintf('\t m: %0.2f',Hidx/my)
else
fprintf('\n')
end
%dH measures the minimum number of citations missing in order to increment
%the current h-index by 1.
if Hidx<n
dH=Hidx+1-Csorted(Hidx+1);
fprintf('\tDelta-h: %i\n',dH);
else
fprintf('\tThis is the max possible h-index\n')
end

%Egghe, L. (2006) Theory and practice of the g-index, Scientometrics, vol.
%69, No 1, pp. 131-152.
%The g-index is defined as follows:
%[Given a set of articles] ranked in decreasing order of the number of
%citations that they received, the g-index is the (unique) largest number
%such that the top g articles received (together) at least g2 citations.
%Although the g-index has not yet attracted much attention or empirical
%verification, it would seem to be a very useful complement to the h-index.
x2=x.^2; cC=cumsum(Csorted);
Gidx=sum(cC>=x2);
fprintf('Egghe''s g-index: %i\t',Gidx)
%dG measures the minimum number of citations missing in order to increment the
%current g-index by 1.
if Gidx<n
dG=(Gidx+1)^2-sum(Csorted(1:Gidx+1));
fprintf('Delta-g: %i\n',dG);
else
fprintf('This is the max possible g-index\n')
end

%delta-H and delta-G should be a measure of how difficult would be for the
%author at hand to increase his/her h and g-index. Note however that the
%range of delta-h is relatively small (in the worst case, delta-h= 2h+1).
%Note that a value of delta-H equal to, e.g., 2, does not mean 2 more
%generic citations are needed for increasing the h-index, but 2 more
%citation on *particular* papers (usually the one in position h+1, and some
%other paper right in the positions previous to h+1).

%Jin, B. H. (2006). h-Index: An evaluation indicator proposed by scientist.
%Science Focus, 1(1), 8-9.
%The A-index is the "A"verage number of citations of the papers in the
%h-core
Aidx=mean(Csorted(1:Hidx));
fprintf('Jin''s A-index: %0.2f\n',Aidx);

%Kosmulski, M. (2006). A new Hirsch-type index saves time and works equally
%well as the original h-index. ISSI Newsletter, 2(3), 4-6.
%A scientist's h(2) index is defined as the highest natural number such
%that his h(2) most-cited papers received each at least [h(2)]2 citations.
H2idx=sum(Csorted>=x2);
fprintf('Kosmulski''s h2-index: %i\n',H2idx);

%Zhang, C.T. The e-index, complementing the h-index for excess citations,
%PLoS ONE, Vol 5, Issue 5 (May 2009), e5429. The e-index is the square
%root of the surplus of citations in the h-set beyond h2, i.e., beyond the
%theoretical minimum required to obtain a h-index of 'h'. The aim of the
%e-index is to differentiate between scientists with similar h-indices but
%different citation patterns.
Eidx=realsqrt(sum(Csorted(1:Hidx))-H2);
fprintf('Zhang''s e-index: %0.1f\n',Eidx);

%The Contemporary h-index was proposed by Antonis Sidiropoulos, Dimitrios
%Katsaros, and Yannis Manolopoulos in their paper Generalized h-index for
%disclosing latent facts in citation networks, arXiv:cs.DL/0607066 v1 13
%Jul 2006. It adds an age-related weighting to each cited article, giving
%(by default; this depends on the parametrization) less weight to older
%articles. The weighting is parametrized; I used gamma=4 and delta=1, like
%the authors did for their experiments. This means that for an article
%published during the current year, its citations account four times. For
%an article published 4 years ago, its citations account only one time. For
%an article published 6 years ago, its citations account 4/6 times, and so
%on.
%In the same paper the authors proposed the normalized h-index defined as
%follow: A researcher has normalized h-index hn = h/Np, if h of its Np
%articles have received at least h citations each, and the rest (Np - h)
%articles received no more than h citations.
fprintf('Sidiropoulos''es normalized h-index: %0.2f\n',Hidx/n)
disp(tr); disp(' ');
if ~isempty(Y)
disp('Years weighted indices'); disp(tr)
Ny=Ny+1; %add 1 to avoid n/0
Sc=sort(4.*Ny.^-1.*C,'descend');
Hcidx=sum(Sc>=x); Hc2=Hcidx^2;
fprintf('Sidiropoulos''es Contemporary h-index (hc-index): %i \t a: %0.2f\n',Hcidx,sum(Sc)/Hc2);

%The age-weighted citation rate was inspired by Bihui Jin's note The
%AR-index: complementing the h-index, ISSI Newsletter, 2007, 3(1), p. 6.
%The AWCR measures the number of citations to an entire body of work,
%adjusted for the age of each individual paper. It is an age-weighted
%citation rate, where the number of citations to a given paper is divided
%by the age of that paper. Jin defines the AR-index as the square root of
%the sum of all age-weighted citation counts over all papers that
%contribute to the h-index.
Nys=Ny(idx);
JAWCR=sum(Csorted(1:Hidx)./Nys(1:Hidx));
fprintf('Jin''s Age-weighted citation rate (AWCR): %0.2f\n',JAWCR);
%The AW-index is defined as the square root of the AWCR to allow comparison
%with the h-index; it approximates the h-index if the (average) citation
%rate remains more or less constant over the years.
fprintf('Jin''s AR-index (AR-index): %0.2f\n',realsqrt(JAWCR));
%However, Harzing sum over all papers instead, because she feels that this
%represents the impact of the total body of work more accurately. (In
%particular, it allows younger and as yet less cited papers to contribute
%to the AWCR, even though they may not yet contribute to the h-index.)
HAWCR=sum(Csorted./Nys);
fprintf('Harzing''s Age-weighted citation rate (AWCR): %0.2f\n',HAWCR);
fprintf('Harzing''s AR-index (AR-index): %0.2f\n',realsqrt(HAWCR));
disp(tr); disp(' ');
end

if ~isempty(A)
disp('Authors weighted indices'); disp(tr)
%The Individual h-index was proposed by Pablo D. Batista, Monica G.
%Campiteli, Osame Kinouchi, and Alexandre S. Martinez in their paper Is it
%possible to compare researchers with different scientific interests?,
%Scientometrics, Vol 68, No. 1 (2006), pp. 179-189. It divides the standard
%h-index by the average number of authors in the articles that contribute
%to the h-index, in order to reduce the effects of co-authorship; the
%resulting index is called hI.
AS=A(idx);
HIidx=Hidx/mean(AS(1:Hidx));
fprintf('Batista''s Individual h-index (hI-index): %0.2f\n',HIidx)
%Harzing also implements an alternative individual h-index,
%hI,norm, that takes a different approach: instead of dividing the total
%h-index, she first normalizes the number of citations for each paper by
%dividing the number of citations by the number of authors for that paper,
%then calculates hI,norm as the h-index of the normalized citation counts.
%This approach is much more fine-grained than Batista et al.'s; she
%believes that it more accurately accounts for any co-authorship effects
%that might be present and that it is a better approximation of the
%per-author impact, which is what the original h-index set out to provide.
SAH=sort(C./A,'descend');
HHIidx=sum(SAH>=x);
fprintf('Harzing''s Individual h-index (hI,norm-index): %0.2f\n',HHIidx)
%The third variation is due to Michael Schreiber and first described in his
%paper To share the fame in a fair way, hm modifies h for multi-authored
%manuscripts, New Journal of Physics, Vol 10 (2008), 040201-1-8.
%Schreiber's method uses fractional paper counts instead of reduced
%citation counts to account for shared authorship of papers, and then
%determines the multi-authored hm index based on the resulting effective
%rank of the papers using undiluted citation counts.
xS=cumsum(1./AS)'; yS=xS<=Csorted'; zS=xS(yS);
Hmidx=zS(end);
fprintf('Schreiber''s Multi-authored h-index (hm-index): %0.2f\n',Hmidx)
disp(tr); disp(' ');
end

if ~isempty(Y) && ~isempty(A)
disp('Years and Authors weighted indices'); disp(tr)
%The per-author age-weighted citation rate is similar to the plain
%AWCR, but is normalized to the number of authors for each paper.
JAWCRN=sum(Csorted(1:Hidx)./Nys(1:Hidx)./A(1:Hidx));
fprintf('Jin''s Age-weighted citation rate (AWCR) normalized per authors: %0.2f\n',JAWCRN);
fprintf('Jin''s AR-index (AR-index) normalized per authors: %0.2f\n',realsqrt(JAWCRN));
HAWCRN=sum(Csorted./Nys./AS);
fprintf('Harzing''s Age-weighted citation rate (AWCR) normalized per authors: %0.2f\n',HAWCRN);
fprintf('Harzing''s AR-index (AR-index) normalized per authors: %0.2f\n',realsqrt(HAWCRN));
disp(tr)
end
hfig2=figure; POS(1)=POS(1)+POS(3);
set(hfig2,'Position',POS)
subplot(2,3,2); plot(x2,Csorted,'b.',x2,Csorted,'r-',x2,x2,'k-'); axis square
title(sprintf('Kosmulski''s\nh2-index')); xlabel('Squared Paper Rank'); ylabel('Citations');
subplot(2,3,3); plot(x2,cC,'b.',x2,cC,'r-',x2,x2,'k-'); axis square
title(sprintf('Egghe''s\ng-index')); xlabel('Squared Paper Rank'); ylabel('Cumulative sum of Citations');
subplot(2,3,4); plot(x,Sc,'b.',x,Sc,'r-',x,x,'k-'); axis square
title(sprintf('Sidiropoulos''s\nhc-index')); xlabel('Paper Rank'); ylabel('Age weighted citations');
subplot(2,3,5); plot(x,SAH,'b.',x,SAH,'r-',x,x,'k-'); axis square