# Cite as "Fedotenkov, Igor, 2018. "A review of more than one hundred Pareto-tail index estimators," MPRA Paper 90072, University Library of Munich, Germany." X<-(abs(rcauchy(n)))^(2) #tail index =0.5. Hill<-function(X,k){ #Hill estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) (sum(log(X[(n-k+1):n]))/k-log(X[n-k]))^(-1) } Hill(X) dHR<-function(X,k){ #De Haan &Resnic (1981) estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) log(k)/(log(X[n])-log(X[n-k+1])) } dHR(X) BB<-function(X,k,alpha){ #Bacro Brito (1995) estimator (page 2) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(alpha)) alpha<-0.9 f<-floor(alpha*k) alpha<-f/k # more exact alpha -log(alpha)/(log(X[n-f+1])-log(X[n-k+1])) } BB(X) Pickands<-function(X,k){ #Pickands (1975) estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) k<-(k%/%4+1)*4 #k-devisible by 4 log(2)/(log((X[n-k/4]-X[n-k/2])/(X[n-k/2]-X[n-k]))) } Pickands(X) DEdH<-function(X,k){ #Dekkers, Einmahl, De Haan "moment" estimator (1989) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) M1<-sum(log(X[(n-k+1):n])-log(X[n-k]))/k M2<-sum((log(X[(n-k+1):n])-log(X[n-k]))^2)/k (M1+1-0.5*(1-M1^2/M2)^{-1})^{-1} } DEdH(X) AB_Hill<-function(X,k){ #Aban & Meerschaert shifted Hill’s estimator (2001) eps<-0.0000001 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) s_min<--10*X[n-k] s_max<-X[n-k] repeat{ s1<-(s_min+s_max)/2 alpha<-(sum(log(X[(n-k+1):n]-s1))/k-log(X[n-k]-s1))^(-1) Z<-sum((X[(n-k+1):n]-s1)^(-1)) RHS<-(alpha+1)*Z/k LHS<-alpha*(X[n-k]-s1)^(-1) if(RHS>LHS) s_min<-s1 else s_max<-s1 s_min s_max if ((s_max-s_min)0) rho<-log(2)^{-1}*log(M4) else rho<-1 #sometimes it gives negative values under the log g7<-g-(M-2*g^2)*(1-rho)/(2*g*rho) g7^{-1} } Peng_H(X) Peng_P<-function(X,k){ #Peng estimator (Pickands modification) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) k1<-round(n/log(n)) k2<-round(n/(2*log(n))) k3<-round(n/(4*log(n))) g1<-Pickands(X,k1)^{-1} g2<-Pickands(X,k2)^{-1} g3<-Pickands(X,k3)^{-1} g<-Pickands(X,k)^{-1} M<-(g2-g3)/(g1-g2) if (M>0){rho<-log(M)/log(2) g7<-g-(g-g3)/(1-4^rho)} else g7<-g g7^{-1} } Peng_P(X) FJP<-function(X,N,alpha_0,delta){ #Fialova, Jureckova, Picek 2004 n<-length(X) if(missing(N)) N<-5 if(missing(alpha_0)) alpha_0<-Hill(X)*1.2 if(missing(delta)) delta<-0.9 r<-n%/%N r0<-n%%N M<-matrix(X[1:(r*n)],nrow=r,ncol=N) XX<-apply(M,1,mean) if (r0>0) XX<-c(XX,mean(X[n-r0:n])) a_N<-N^((1-delta)/alpha_0) F<-mean(XX<=a_N) if ((F>0)&(F<1)){ alpha<--log(1-F)/log(a_N)} else { alpha<-alpha_0 } alpha } FJP(X) vanZyl<-function(X,k){ #van Zyl estimator 2015 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) h_gamma<-Hill(X,k) h_mu<-X[n-k] h_sigma<-h_mu*h_gamma TT<-mean(log(X[(n-k+1):n]*h_gamma/h_sigma+1-h_mu*h_gamma/h_sigma)) (TT)^(-1) } vanZyl(X) Nuyts<-function(X,k){ #Nuyts estimator 2015 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) eps<-0.000000001 max_a<-20 min_a<-0 repeat{ a<-(max_a+min_a)/2 L<-mean(log(X[(n-k+1):n])) R<-1/a+(log(X[n-k+1])*X[n-k]^(-a)-log(X[n])*X[n]^(-a))/(X[n-k+1]^(-a)-X[n]^(-a)) if (L>R) max_a<-a else min_a<-a if (abs(max_a-min_a)eps)a1<-a else{ max_a<-20 min_a<-0 repeat{ a<-(max_a+min_a)/2 L<-mean(log(X[(n-k+1):n])) R<-1/a+(log(X[n-k+1])*X[n-k]^(-a)-log(X[n])*X[n]^(-a))/(X[n-k+1]^(-a)-X[n]^(-a)) if (L>R) min_a<-a else max_a<-a if (abs(max_a-min_a)=(X[n-m+1]-a1)) X3<-as.numeric(X>=(X[n-m+1]-a2)) M1<-sum(X1*X2) M2<-sum(X2*X3) (log(a2)-log(a1))/(log(M1)-log(M2)) } Muller(X) #library(logspline) #MRsP<-function(X,k){ # Muller and Rufibach 2009: smoothed Pickands estiamtor #X<-sort(X) #n<-length(X) #if(missing(k)) k<-round(sqrt(n)) #r<-k/4 #param<-logspline(X) #X1<-qlogspline(((n-r+1)/n), param) #X2<-qlogspline(((n-2*r+1)/n), param) #X4<-qlogspline(((n-4*r+1)/n), param) #log(2)/(log((X1-X2)/(X2-X4))) #} #MRsP(X) MS<-function(X){ #Meerschaert and Scheffer 1998 n<-length(X) m<-mean(X) S<-sum((X-m)^2) (2*log(n))/max(0,log(S)) } MS(X) Politis2<-function(X){ #Politis 2002 n<-length(X) I<-c(1:n) S<-cumsum(X^2)/I Y<-log(S) Yb<-mean(Y) Ib<-mean(log(I)) D1<- sum((Y-Yb)*(log(I)-Ib)) D2<- sum((log(I)-Ib)^2) 2/(D1/D2+1) } Politis<-function(X){ n<-length(X) m<- 1000 # the number of bootstraps alpha<-vector(length=m) for (i in 1:m){ Y<-sample(X,n, replace=FALSE) alpha[i]<-Politis2(Y) } median(alpha) } Politis(X) MEP_BAS<-function(X,r){ #McElroy and Politis 2006 BAS estimator #r shall be relatively large, such that 2*r-th moment does not exhist n<-length(X) if(missing(r)) r<-1 S<-sum((X)^(2*r)) (2*r*log(n))/log(S) } MEP_BAS(X) MEP_CEN<-function(X){ #McElroy and Politis 2006 CEN estimator n<-length(X) k<-round(sqrt(n)) S1<-sum(X^(2)) S2<-sum(X[1:k]^(2)) (2*log(k))/(log(S1)-log(S2)) } MEP_CEN(X) MEP_SCEN<-function(X){ #McElroy and Politis 2006 RCEN estimator n<-length(X) b<-round(n^(1/3)) M<-floor(n/(b^2))-1 XX<-matrix(X[1:(M*b^2)],nrow=b^2,ncol=M) x<-X[(M*b^2+1):n] #last group is formed out of b^2 last observations + remaining observations (n is not necessarily divisible by b^2) SS<-vector(length=M) for (i in 1:M){ SS[i]<-MEP_CEN(XX[,i]) } ss<-MEP_CEN(x) mean((c(SS,ss))^(-1))^(-1) #Avereging not alpha, but gamma: median performs better? } MEP_SCEN(X) MEP_RCEN<-function(X){ #McElroy and Politis 2006 RCEN estimator n<-length(X) b<-floor(sqrt(n)) B<-vector(length=b) for (i in 1:b){ S1<-sum(X^(2)) S2<-sum(X[((i-1)*b+1):(i*b)]^(2)) B[i]<-(log(S1)-log(S2))/(2*log(b)) } mean(B)^(-1) } MEP_RCEN(X) MEP_SRCEN<-function(X){ #McElroy and Politis 2006 SRCEN estimator n<-length(X) b<-round(n^(1/3)) M<-floor(n/(b^2))-1 XX<-matrix(X[1:(M*b^2)],nrow=b^2,ncol=M) x<-X[(M*b^2+1):n] #last group is formed out of b^2 last observations + remaining observations (n is not necessarily divisible by b^2) SS<-vector(length=M) for (i in 1:M){ SS[i]<-MEP_RCEN(XX[,i]) } ss<-MEP_RCEN(x) mean((c(SS,ss))^(-1))^(-1) #Avereging not alpha, but gamma: median performs better? } MEP_SRCEN(X) Falk<-function(X,k){ #Falk (1994) estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) P1<-Pickands(X,(k/2)) P2<-Pickands(X,k) P1*5/13+P2*8/13 } Falk(X) Zipf<-function(X,k,w){ # Zipf (Kratz and Resnick (1996) quantile plots estimator) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(w)) w<-rep(1,k) x<--log(c(n:1)/(n+1))[(n-k+1):n] y<-log(X[(n-k+1):n]) f<-lm(y~x,weights=w) 1/f$coefficients[2] } Zipf(X) AM1<-function(X,k){ #Aban & Meerschaert Hill estimator if(missing(k)) k<-round(sqrt(n)) Hill(X,k)*k/(k-1) } AM1(X) AM2<-function(X,k){ #Aban & Meerschaert simplified estimator: ne poluchilos' X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) j<-c(n:1) a1<-rev(cumsum(j^(-1))) a<-a1[(n-k+1):n] x<-X[(n-k+1):n] b_a<-mean(a) s<-b_a*(a-b_a)/(sum((a-b_a)^2)) (sum(s*log(x)))^(-1) } AM2(X) AM3<-function(X,k){ # Aban & Meerschaert - smoothing regression X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) x<--log((c(n:1)-0.5)/n)[(n-k+1):n] y<-log(X[(n-k+1):n]) f<-lm(y~x) 1/f$coefficients[2] } AM3(X) GI1<-function(X,k){ # Gabaix & Ibragimov - reversed A&M X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) x<--log((c(n:1)-0.5)/n)[(n-k+1):n] y<-log(X[(n-k+1):n]) f<-lm(x~y) f$coefficients[2] } GI1(X) GI2<-function(X,k){ # Gabaix & Ibragimov Harmonic 1 X<-sort(X,decreasing=TRUE) n<-length(X) if(missing(k)) k<-round(sqrt(n)) I<-1/c(1:n) x1<--cumsum(I) H<-x1[1:(k-1)] y<-log(X[2:k]) f<-lm(y~H) 1/f$coefficients[2] } GI2(X) GI3<-function(X,k){ # Gabaix & Ibragimov Harmonic 2 X<-sort(X,decreasing=TRUE) n<-length(X) if(missing(k)) k<-round(sqrt(n)) I<-1/c(1:n) x1<--cumsum(I) H<-x1[1:(k-1)] y<-log(X[2:k]) f<-lm(H~y) f$coefficients[2] } GI3(X) BVT<-function(X,k){ # (Beirlant, Vynckier and Teugels Excess functions), X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) UH<-rep(0,k) for (j in 1:k){ UH[j]<-X[n-j]*(Hill(X,j)^(-1)) } x<--log(c(1:(k))/n) y<-log(UH[1:(k)]) f<-lm(y~x) 1/f$coefficients[2] } BVT(X) BDG<-function(X,k){ #Beirlant, Dierckx and Guillou 2005, X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) UH<-rep(0,k) for (j in 1:k){ UH[j]<-X[n-j]*(Hill(X,j)^(-1)) } I<- log((k+1)/c(1:k)) mI<-mean(I) (mean((I-mI)*log(UH)))^{-1} } BDG(X) BDG_H<-function(X,k){ #Beirlant, Dierckx and Guillou 2005, Hill modification X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) UH<-rep(0,k) for (j in 1:k){ UH[j]<-X[n-j]*(Hill(X,j)^(-1)) } Hill(UH,(k-1)) } BDG_H(X) SS<-function(X,k){ #Schultze and Steinebach 1996 no intercept X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) I<-n/c(1:k) Y1<-sum(log(I)^2) Y2<-sum(log(I)*log(X[n:(n-k+1)])) Y1/Y2 } SS(X) SS2<-function(X,k){ #Schultze and Steinebach 1996 reversed eplanotory and dependent variables X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) x<--log(c(n:1)/(n+1))[(n-k+1):n] y<-log(X[(n-k+1):n]) f<-lm(x~y) f$coefficients[2] } SS2(X) BVT2<-function(X,k,w){ #Beirlant, J., P. Vynckier, and J. L. Teugels (1996b). regression through the anchor point X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(w)) w<-rep(1,k) x<--log(c(n:1)/(n+1))[(n-k+1):n] +log((k+1)/(n+1)) y<-log(X[(n-k+1):n])-log(X[n-k]) f<-lm(y~0+x,weights=w) 1/f$coefficients[1] } BVT2(X) BF1<-function(X,k){ #Brito and Freitas Limiting behaviour of a geometric-type estimator for tail indices (Geometric mean of two Schultze and Steinebach 1996 estimators) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) g1<-Zipf(X,k) g2<-SS2(X,k) sqrt(g1*g2) } BF1(X) DJV<-function(X,k){ #Danielsson, Jansen, and De Vries 1996 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) M1<-sum(log(X[(n-k+1):n])-log(X[n-k]))/k M2<-sum((log(X[(n-k+1):n])-log(X[n-k]))^2)/k 2*M1/M2 } DJV(X) DPR1<-function(X,m){ #DPR estimator n<-length(X) if(missing(m)) m<-round(sqrt(n)) r<-floor(n/m)-1 #last group is treated separately SS<-vector(length=(r+1)) # vector for M2/M1 for (i in 1:r){ x1<-sort(X[((i-1)*m+1):(i*m)]) SS[i]<-x1[(m-1)]/x1[m] } x<-sort(X[(m*r+1):n]) #last group is formed out of m last observations + remaining observations (n is not necessarily divisible by m) n1<-length(x) SS[r+1]<-x[(n1-1)]/x[n1] ss<-sum(SS) ss/(r-ss) } DPR1(X) DPR<-function(X,M){ #different permutations if(missing(M)) M<-round(sqrt(n)) #number of permutations V<-vector(length=M) n<-length(X) for (i in 1:M){ X1<-sample(X,n,replace=FALSE) V[i]<-DPR1(X1) } mean(V) } DPR(X) GDPR1<-function(X,m,r){ #GDPR estimator n<-length(X) if(missing(m)) m<-round(sqrt(n)) if(missing(r)) r<- -0.1 t<-floor(n/m)-1 #last group is treated separately SS<-vector(length=(t+1)) # vector for M2/M1 for (i in 1:t){ x1<-sort(X[((i-1)*m+1):(i*m)]) SS[i]<-x1[(m-1)]/x1[m] } x<-sort(X[(m*t+1):n]) #last group is formed out of m last observations + remaining observations (n is not necessarily divisible by m) n1<-length(x) SS[t+1]<-x[(n1-1)]/x[n1] if (r==0){ZZ<--log(SS)} else { ZZ<- (1-SS^r)/r} pr<-mean(ZZ) (1-r*pr)/pr } GDPR1(X) GDPR<-function(X,M){ #different permutations if(missing(M)) M<-round(sqrt(n)) #number of permutations V<-vector(length=M) n<-length(X) for (i in 1:M){ X1<-sample(X,n,replace=FALSE) V[i]<-GDPR1(X1) } mean(V) } GDPR(X) Qi1<-function(X,m){ #Qi 2010 n<-length(X) if(missing(m)) m<-round(sqrt(n)) k<-round(sqrt(m)) r<-floor(n/m)-1 #last group is treated separately HH<-vector(length=(r+1)) # vector for group-specific Hill estimates for (i in 1:r){ x1<-X[((i-1)*m+1):(i*m)] HH[i]<-Hill(x1,k) } x1<-X[(m*r+1):n] #last group is formed out of m last observations + remaining observations (n is not necessarily divisible by m) n1<-length(x1) k<-round(sqrt(n1)) HH[r+1]<-Hill(x1,k) mean(HH) } Qi1(X) Qi<-function(X,M){ #different permutations if(missing(M)) M<-round(sqrt(n)) #number of permutations V<-vector(length=M) n<-length(X) for (i in 1:M){ X1<-sample(X,n,replace=FALSE) V[i]<-Qi1(X1) } mean(V) } Qi(X) Vaiciulis<-function(X,m,l){ #vaiciulis 2012 ne poluchilos' n<-length(X) if(missing(m)) m<-round(sqrt(n)) if(missing(l)) l<-3 t<-floor(n/m)-1 #last group is treated separately FF<-matrix(nrow=(t+1),ncol=(l+1)) # vector for f(M2/M1) for (j in 1:(l+1)){ for (i in 1:t){ x1<-sort(X[((i-1)*m+1):(i*m)]) FF[i,j]<- log(x1[m]/x1[(m-1)])^j } x<-sort(X[(m*t+1):n]) #last group is formed out of m last observations + remaining observations (n is not necessarily divisible by m) n1<-length(x) FF[(t+1),j]<-log(x[n1]/x[(n1-1)])^j } ss<-rowMeans(FF) D1<-0 D2<-0 for (i in 1:l){ D1<-D1+(-1)^(i+1)*gamma(i+1)^(-1)*ss[i] } for (i in 2:(l+1)){ D2<-D2+(-1)^i*gamma(i+1)^(-1)*ss[i] } D1/D2 } Vaiciulis(X) #X<-runif(10000) #X<-(1/(1-X)) Vaiciulis2009<-function(X,m){ #Vaiciulis 2009 n<-length(X) if(missing(m)) m<-round(sqrt(n)) IR<-vector(length=(n-2*m)) for (i in 0:(n-2*m)){ R1<-sum(X[(i+1):(i+m)]^2) R2<-sum(X[(i+m+1):(i+2*m)]^2) IR[i]<-abs((R1-R2))/(R1+R2) } ir<-mean(IR) (2.0024-0.4527*ir+0.4246*ir^2-0.1386*ir^3)*cos(pi*ir/2) } Vaiciulis2009(X) VP_H<-function(X,k,r){ #Vaiciulis Paulauskas, 2013: Hill estimator modification X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(r)) r<- -0.1 Y<-X[(n-k+1):n]/X[n-k] if (r==0){ZZ<-log(Y)} else { ZZ<- (Y^r-1)/r} lambda<-mean(ZZ) (1+r*lambda)/lambda } VP_H(X) VP_M<-function(X,k,r){ #Vaiciulis Paulauskas, 2013: Moment estimator modification X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(r)) r<- -0.1 Y<-X[(n-k+1):n]/X[n-k] if (r==0){M1<-log(Y)} else { M1<- (Y^r-1)/r} if (r==0){M12<-log(Y)} else { M12<- (Y^(2*r)-1)/(2*r)} M2<-M1^2 H1<-mean(M1) H12<-mean(M12) H2<-mean(M2) (H1+1-0.5*(1-H1*H12/H2)^{-1})^(-1) } VP_M(X) VP_V<-function(X,k,r){ #Vaiciulis Paulauskas, 2013: De Vries estimator modification X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(r)) r<- -0.1 Y<-X[(n-k+1):n]/X[n-k] if (r==0){M1<-log(Y)} else { M1<- (Y^r-1)/r} if (r==0){M12<-log(Y)} else { M12<- (Y^(2*r)-1)/(2*r)} M2<-M1^2 H1<-mean(M1) H12<-mean(M12) H2<-mean(M2) 2*H12/H2 } VP_V(X) G<-function(X,k,r,u){ #Vaiciulis Paulauskas, 2017: function G X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(r)) r<- -0.1 if(missing(u)) u<- 0 Y<-X[(n-k+1):n]/X[n-k] g<-Y^r*log(Y)^u mean(g) } VP_2<-function(X,k,r){ #Vaiciulis Paulauskas, 2017: gamma_2 page 6 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(r)) r<- -0.1 G1<-G(X,k,r,1) D1<-2*G1 D2<-2*r*G1+1+(4*r*G1+1)^0.5 D2/D1 } VP_2(X) VP_3<-function(X,k,r){ #Vaiciulis Paulauskas, 2017: gamma_3 page 6 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(r)) r<- -0.1 G0<-G(X,k,r,0) G1<-G(X,k,r,1) D1<-r*G1-G0+1 D2<-(r^2)*G1 if (r==0) a<-DJV(X) else a<-D2/D1 a } VP_3(X) JP2004a<-function(X,m,delta){ #Jureckova Picek estimator n<-length(X) if(missing(m)) m<-round(sqrt(n)) if(missing(delta)) delta<-0.05 T<-floor(n/m)-1 #last group is treated separately SS<-vector(length=(T+1)) # vector for maxima for (i in 1:T){ x1<-sort(X[((i-1)*m+1):(i*m)]) SS[i]<-x1[m] } x<-sort(X[(m*T+1):n]) #last group is formed out of m last observations + remaining observations (n is not necessarily divisible by m) n1<-length(x) SS[T+1]<-x[n1] a1<-m*T^(1-delta) a2<-(1-T^(-(1-delta))) log(a1)/log(quantile(SS,probs=a2)) } JP2004a(X) JP2004<-function(X,M){ #different permutations if(missing(M)) M<-round(sqrt(n)) #number of permutations V<-vector(length=M) n<-length(X) for (i in 1:M){ X1<-sample(X,n,replace=FALSE) V[i]<-JP2004a(X1) } mean(V) } JP2004(X) MM<-function(X,k,alpha){ n<-length(X) sum((log(X[(n-k+1):n])-log(X[n-k]))^alpha)/k } rho<-function(X,k,tau){ #Caeiro et al. 2005 auxiliary functions X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(tau)) tau<-0 M1<-MM(X,k,1) M2<-MM(X,k,2) M3<-MM(X,k,3) if (tau==0){ T<-(log(M1)-log(M2/2)/2)/(log(M2/2)/2-log(M3/6)/3) } else{ T<-(M1^tau-(M2/2)^(tau/2))/((M2/2)^(tau/2)-(M3/6)^(tau/3)) } min(-abs(3*(T-1)/(T-3) ),0) } beta<-function(X,k){ X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) I<-c(1:k)/k U<- c((n-1):1)*(log(X[2:n])-log(X[1:(n-1)])) rho1<-rho(X,k,0) M0<-(k/n)^rho1 M1<-mean(I^rho1) M2<-mean(U[1:k]) M3<-mean(U[1:k]*I^rho1) M4<-mean(U[1:k]*I^(2*rho1)) M0*(M1-M2)/(M1*M2-M3) } BCF<-function(X,k){ #Brito Cavalcante and Freitas 2016, equation 6 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) ind<-1:k S1<-mean(log(n/ind)^2) S2<-mean(log(n/ind))^2 i<-S1-S2 M1<-MM(X,k,1) M2<-MM(X,k,2) 1/sqrt((M2-M1^2)/i) } BCF(X) BCF2<-function(X,k,k2){ #Brito Cavalcante and Freitas 2016, equation 12 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(k2)) k2<-round(max(0.9*n,k)) g<-BCF(X,k)^(-1) b<-beta(X,k2) r<-rho(X,k2) g2<-g*(1-(n/k)^r*b/(1-r)^2) g2^(-1) } BCF2(X) BCF3<-function(X,k,k2){ #Brito Cavalcante and Freitas 2016, equation 13 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(k2)) k2<-round(max(0.9*n,k)) g<-BCF(X,k)^(-1) b<-beta(X,k2) r<-rho(X,k2) g2<-g*exp(-(n/k)^r*b/(1-r)^2) g2^(-1) } BCF3(X) HKKP<-function(X,k){ #HUISMAN, Koedijk, Kool, Palm 2001 eqation 5 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) HH<-vector(length=(k-3)) ind<-c(4:k) for (i in 4:k){ HH[i-3]<-Hill(X,i) } rg<-lm(HH~ind) rg$coefficients[1] } HKKP(X) dHP<-function(X,k){ #De Haan and Pereira (page 40 (2)) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) D1<-k*X[n-k]^2 ind<-1:(n-k) D2<-sum(X[ind]^2) beta<-D1/D2 2/(beta+1) } dHP(X) Fan<-function(X,m){ #Fan (2004) page 19 (7) X<-sort(X) n<-length(X) if(missing(m)) m<-round(sqrt(n)) N<-10000 #number of subsamples Nm<-N*m XX<-matrix(sample(X,Nm, replace=TRUE),ncol=m,nrow=N) h<-log(rowSums(XX))/log(m) mean(h)^(-1) } Fan(X) DJV<-function(X,k){ #Danielsson, Jansen, and De Vries 1996 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) M1<-sum(log(X[(n-k+1):n])-log(X[n-k]))/k M2<-sum((log(X[(n-k+1):n])-log(X[n-k]))^2)/k 2*M1/M2 } DJV(X) MM<-function(X,k,alpha){ n<-length(X) sum((log(X[(n-k+1):n])-log(X[n-k]))^alpha)/k } Segers2001<-function(X,k,p){ #Segers 2001, X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(p)) p<-0.7 M<-sum((X[n-k]/X[(n-k+1):n])^p)/k ((M^(-1)-1)/p)^(-1) } Segers2001(X) GM1<-function(X,k,alpha){ #Gomes Martin 2001, 1 estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(alpha)) alpha<-0.5 M<-MM(X,k,alpha) H<-MM(X,k,1) (gamma(alpha+1)*H^(alpha-1))/M } GM1(X) GM2<-function(X,k,alpha){ #Gomes Martin 2001, 2 estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(alpha)) alpha<-0.5 M<-MM(X,k,alpha) (gamma(alpha+1)/M)^(1/alpha) } GM2(X) rho<-function(X,k,tau){ #Caeiro et al. 2005 auxiliary functions X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(tau)) tau<-0 M1<-MM(X,k,1) M2<-MM(X,k,2) M3<-MM(X,k,3) if (tau==0){ T<-(log(M1)-log(M2/2)/2)/(log(M2/2)/2-log(M3/6)/3) } else{ T<-(M1^tau-(M2/2)^(tau/2))/((M2/2)^(tau/2)-(M3/6)^(tau/3)) } min(-abs(3*(T-1)/(T-3) ),0) } beta<-function(X,k){ X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) I<-c(1:k)/k U<- c((n-1):1)*(log(X[2:n])-log(X[1:(n-1)])) rho1<-rho(X,k,0) M0<-(k/n)^rho1 M1<-mean(I^rho1) M2<-mean(U[1:k]) M3<-mean(U[1:k]*I^rho1) M4<-mean(U[1:k]*I^(2*rho1)) M0*(M1-M2)/(M1*M2-M3) } CGP1<-function(X,k){ #Caeiro et al. 2005 first estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) rho1<-rho(X,k,0) beta1<-beta(X,k) H<-MM(X,k,1) (H*(1-beta1/(1-rho1)*(n/k)^rho1))^(-1) } CGP1(X) CGP2<-function(X,k){ #Caeiro et al. 2005 second estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) rho1<-rho(X,k,0) beta1<-beta(X,k) H<-MM(X,k,1) (H*exp(-beta1/(1-rho1)*(n/k)^rho1))^(-1) } CGP2(X) GMN1<-function(X,k){ #Gomes et al. 2000 Jackknife estimators: X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) g1<-(Hill(X,k))^(-1) g2<-(DJV(X,k))^(-1) (2*g2-g1)^(-1) } GMN1(X) GMN2<-function(X,k){ #Gomes et al. 2000 Jackknife estimators: X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) g1<-(Hill(X,k))^(-1) g3<-(GM2(X,k,2))^(-1) 4*g3-3*g1 } GMN2(X) GMN3<-function(X,k){ #Gomes et al. 2000 Jackknife estimators: X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) g2<-(DJV(X,k))^(-1) g3<-(GM2(X,k,2))^(-1) (3*g2-2*g3)^(-1) } GMN3(X) GMN4<-function(X,k){ #Gomes et al. 2000 Jackknife estimators: X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) g1<-(Hill(X,round(k/2)))^(-1) g2<-(Hill(X,k))^(-1) (2*g1-g2)^(-1) } GMN4(X) GM3<-function(X,k,tau){ #Gomes Martin 2002, 1 estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(tau)) tau<-0 rho1<-rho(X,k,0) g2<-(DJV(X,k))^(-1) g3<-(GM2(X,k,2))^(-1) rho1/(-(2-rho1)*g2+2*g3) } GM3(X) GM4<-function(X,k,tau){ #Gomes Martin 2002, 1 estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(tau)) tau<-0 rho1<-rho(X,k,0) g2<-(Hill(X,round(k/2)))^(-1) g3<-(Hill(X,k))^(-1) (1-2^(-rho1))/(g3-2^(-rho1)*g2) } GM4(X) GMNj1<-function(X,k){ #Gomes et al 2002, jackknife 1 estimator (page 10) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) gamma_n1<-vector(length=n) for (i in 1:n){ ind<-rep(TRUE,n) ind[i]<-FALSE #i-th element is removed X1<-X[ind] gamma_n1[i]<-(Hill(X1,k))^(-1) } g<-(Hill(X,k))^(-1) bar_g<-mean(gamma_n1) #rho1<-rho(X,k,1) #generalized rabotaet ploxo. #D1<-g-(n/(n-1))^rho1*bar_g #D2<-1-(n/(n-1))^rho1 #D2/D1 (n*g-(n-1)*bar_g)^(-1) } GMNj1(X) GMNj2<-function(X,k){ #Gomes et al 2002, jackknife 2 estimator (page 11) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)/2)*2 g2<-(Hill(X,round(k/2)))^(-1) g1<-(Hill(X,k))^(-1) w<-log(1-k/n)/log(1-k/(2*n)) (1-w)/(g1-g2*w) } GMNj2(X) GMNj3<-function(X,k){ #Gomes et al 2002, jackknife 2 estimator (page 12) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)/2)*2 g2<-(Hill(X,round(k/2)))^(-1) g1<-(Hill(X,k))^(-1) (1+k/n)/(g2*(2+k/n)-g1) } GMNj3(X) GPM<-function(X,s,k){ #Gomes et al 2005, generalized estimator (page 3) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(s)) s<-2 #s>=1 ind<-c(1:k) (s^2*sum(ind^(s-1)*log(rev(X[(n-k+1):n]/X[(n-k)])))/k^s)^(-1) } GPM(X) GPMj<-function(X,s,k){ #Gomes et al 2005, jackknife estimator (page 3) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(s)) s<-2 #s>=1 g1<-GPM(X,1,k)^(-1) g2<-GPM(X,s,k)^(-1) rho1<-rho(X,k,1) #it is better to use -1 #rho1<--1 (-s*(1-rho1)/(rho1*(s-1))*(g1-(s-rho1)/(s*(1-rho1))*g2))^(-1) } GPMj(X) GFM<-function(X,k){ #Gomes et al. 2005, best linear estimator (rho=-1) (page 7) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) H<-vector(length=k) for (i in 1:k){ H[i]<-Hill(X,(i))^(-1) } ind<-c(1:(k-1)) (sum(H[1:(k-1)]*ind)*6/(k^2-1) - H[k]*(2*k-1)/(k+1))^(-1) } GFM(X) GFM2<-function(X,k){ #Gomes et al. 2005, best linear estimator (general rho) (page 17) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) a<-vector(length=(k+1)) # vector for weights ind<-c(1:k) r<-rho(X) a1<-((1-r)/r)^2/k a2<-k*(1-2*r)/(1-r) a[1:k]<-a1*(1-a2*((ind/k)^(1-r)-((ind-1)/k)^(1-r))) a[k+1]=-(1-r)/r 1/(sum(a*log(X[n:(n-k)]))) } GFM2(X) CG<-function(X,k,alpha){ #Caeiro and Gomes 2002 (Test) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if (missing(alpha)){ r<-rho(X,k) alpha<--log(1-r-((1-r)^2-1)^0.5)/log(1-r) alpha<-max(alpha,1) } D1<-gamma(alpha)/MM(X,k,(alpha-1)) D2<- MM(X,k,(2*alpha)/gamma(2*alpha+1)) 1/(D1*D2^0.5) } CG(X) GMV1<-function(X,k,alpha){ #Gomes Miranda & Viseu Statistica neerlandica page 3 (equation 5) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if (missing(alpha)) alpha<- 1.5 ind<-c(1:k) U<- ind*(log(X[n-ind+1])-log(X[n-ind])) (alpha/k*sum((ind/k)^(alpha-1)*U))^(-1) } GMV1(X) GMV2<-function(X,k,alpha){ #Gomes Miranda & Viseu Statistica neerlandica page 3 (equation 6) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if (missing(alpha)) alpha<- 1.5 ind<-c(1:k) U<- ind*(log(X[n-ind+1])-log(X[n-ind])) (-alpha^2/k*sum((ind/k)^(alpha-1)*log(ind/k)*U))^(-1) } GMV2(X) GMV3<-function(X,k){ #Gomes Miranda & Viseu Statistica neerlandica page 11 (Jackknife equation 26) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) r<-rho(X,min((k*1.1),(0.9*n))) # k shall be higher for rho calculation g1<-GMV1(X,k,1)^(-1) g2<-GMV1(X,k,(1-r))^(-1) r^2/((1-r)^2*g1-(1-2*r)*g2) } GMV3(X) GMV4<-function(X,k){ #Gomes Miranda & Viseu Statistica neerlandica page 11 (Jackknife equation 27) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) r<-rho(X,min((k*1.1),(0.9*n))) # k shall be higher for rho calculation #------------------------------------------------------------ calculation for optimal alpha eps<-0.00000001 # maximal error for alpha calculation a_min<-1 a_max<-100 # maximal value=100 repeat{ alpha<-(a_min+a_max)/2 T<-3*alpha^3-5*alpha^2+alpha*(r^2-r+3)-(2*r^2-2*r+1) if (T<0) a_min<-alpha else a_max<-alpha if (abs(a_max-a_min)(0.9*n)) k<-round(0.9*n) } if(missing(p)){ H<-1/Hill(X) r<-rho(X) phi<-1-r/2-0.5*(r^2-4*r+2)^0.5 phi<-min(max(0,phi),(2^0.5/2)) p<-phi/H } if (p==0) alpha<-Hill(X,k) else{ ind<-1:k U<-(X[n-ind+1]/X[n-k])^p gamma<-(1-1/mean(U))/p alpha<-1/gamma } alpha } BGP(X) CGBW<-function(X,k,p){ #caeiro et al. 2016 estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) r<-rho(X) B<-beta(X) H<-1/Hill(X) if(missing(p)){ phi<-1-r/2-0.5*(r^2-4*r+2)^0.5 phi<-min(max(0,phi),(2^0.5/2)) p<-phi/H } else { phi<-p*H } if (p==0) gamma<-1/Hill(X,k) else{ ind<-1:k U<-(X[n-ind+1]/X[n-k])^p gamma<-(1-1/mean(U))/p } (gamma*(1-B*(1-phi)/(1-r-phi)*(n/k)^r))^(-1) } CGBW(X) GRM_port<-function(X,k,q){ #Gomes, Henriwues-Rodrigues 2016 REVSTAT X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(q)) q<-0.001 kk<-round(n*q+1) Y<-X[(kk+1):n]-X[kk] BGP(Y,k) } GRM_port(X) GHR_port<-function(X,k,q){ #Gomes and Henriques-Rodrigues 2016 equation 2.4 (Port version of Caeiro et al. 2005) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(q)) q<-0.001 kk<-round(n*q+1) Y<-X[(kk+1):n]-X[kk] CGP1(Y,k) } GHR_port(X) Alves1995<-function(X,k,c){ #Alves 1995 estimator (eq. 1.5 - positive tail indexes) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(c)) c<-5 if ((c*k)>n) c<-2 log(c)/log(X[n-k+1]/X[n-c*k+1]) } Alves1995(X) Alves<-function(X,k,k0){ #Alves estimator 1995 (eq. 1.6 allows for negative tail indexes) X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) if(missing(k0)) k0<-round(k^(3/4)) TT<-(X[(n-k0+1):n]-X[n-k])/(X[(n-k0)]-X[n-k]) (sum(log(TT))/k0)^(-1) } Alves(X) Weis<-function(X,k){ #Weis estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(log(n))*2 D1<-log((X[k]-X[1])/(X[(k/2)]-X[1])) log(2)/D1 } Weis(X) FH_MLE1<-function(par,X,k){ #Feuerverger and Hall 1999 estimator MLE - ne poluchilos' X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) D<-par[1] beta<-par[2] U<-(1:(n-1))*(log(X[n:2])-log(X[(n-1):1])) ind<-c(1:(n-1))/n W<-U*exp(-D*ind^(-beta)) M1<-mean(ind[1:k]^(-beta)) M2<-mean(W[1:k]) D*M1+log(M2) } FH_MLE<-function(X,k){ X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) L<-optim(c(1,1), FH_MLE1, X=X,k=k, method="L-BFGS-B", lower=c(-Inf,0), upper=c(Inf,Inf)) D<-L$par[1] rho<-L$par[2] U<-(1:(n-1))*(log(X[n:2])-log(X[(n-1):1])) ind<-c(1:(n-1))/n mean(U[1:k]*exp(-D*ind[1:k])^(-rho))^(-1) } FH_MLE(X) FH_MLE2<-function(par,X,k){ #Feuerverger and Hall 1999 OLS estimator Ne poluchilos' X<-sort(X) n<-length(X) if(missing(k)) k<-round(log(n))*2 mu<-par[1] D<-par[2] beta<-par[3] U<-(1:(n-1))*(X[n:2]-X[(n-1):1]) ind<-c(1:(n-1))/n V<-log(U) sum((V[1:k]-mu-D*ind^(-beta))^2) } k<-round(sqrt(length(X))) P<-optim(c(0.5,1,1), FH_MLE1, X=X,k=k, method="L-BFGS-B", lower=c(-Inf,Inf,0), upper=c(Inf,Inf,Inf)) exp(-0.5772157-P$par[1]) GM_FH<-function(X,k){ #Gomes and Martin (2004) variant of Feuerverger and Hall MLE estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) U<-(1:(n-1))*(log(X[n:2])-log(X[(n-1):1])) g1<-Hill(X,k)^(-1) g2<-mean(U[1:k]) g3<- sum((2*c(1:k)-k-1)*U[1:k]) g4<- sum(c(1:k)*(2*c(1:k)-k-1)*U[1:k]) (g1-g2*g3/g4)^(-1) } GM_FH(X) GM_FH2<-function(X,k){ #Gomes and Martin (2004) variant of Feuerverger and Hall OLS estimator X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) U<-(1:(n-1))*(log(X[n:2])-log(X[(n-1):1])) g1<-2*(2*k+1)/(k*(k-1))*sum(log(U[1:k])) g2<-6/(k*(k-1))*sum(c(1:k)*log(U[1:k])) exp(g1-g2+0.5772157)^(-1) } GM_FH2(X) SAG<-function(X,k,q){ #SAG estimator X<-sort(X) if(missing(q)) q<-0.1 m<-round(n*q+1) Y<-X[(m+1):n]-X[m] n<-length(Y) if(missing(k)) k<-round(sqrt(n)) Hill(Y,k) } SAG(X) Smith<-function(X,u){ #Smith estimator X<-sort(X) n<-length(X) if(missing(u)) { k<-round(sqrt(n)) u<-X[n-k] } Y<-(X[X>u]-u) Z<-log(Y/u+1) mean(Z)^(-1) } Smith(X) BP<-function(X,k){ #Baek Pipiras 2010 page 4 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) ind<-c(1:k) y<-log((ind-0.5)/k) X1<--log(X[n-ind+1]/X[n-k]) X2<-X[n-k]/X[n-ind+1] f<-lm(y~X1+X2) f$coefficients[2] } BP(X) BP1<-function(X,k){ #Baek Pipiras 2010 page 5, equation 2.16 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) ind<-c(1:k) X1<-log((ind-0.5)/k) y<--log(X[n-ind+1]/X[n-k]) X2<-X[n-k]/X[n-ind+1] f<-lm(y~X1+X2) (f$coefficients[2])^(-1) } BP1(X) BP2<-function(X,k){ #Baek Pipiras 2010 page 5, equation 2.17 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) H<-Hill(X,k)^(-1) ind<-c(1:k) X1<-log((ind-0.5)/k) y<--log(X[n-ind+1]/X[n-k]) X2<-X[n-k]/X[n-ind+1] f<-lm(y~X1+X2) beta<-f$coefficients[3] XX<-beta*sum(X[n-ind+1]^(-1)-X[n-k+1]^(-1))/k (H-XX)^(-1) } BP2(X) BP3<-function(X,k){ #Baek Pipiras 2010 page 5, equation 2.18 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) H<-Hill(X,k) M2<-MM(X,k,2) (2+1/H)*2/M2-2*H*(2/M2)^0.5 } BP3(X) BP4<-function(X,k){ #Baek Pipiras 2010 page 5, equation 2.19 X<-sort(X) n<-length(X) if(missing(k)) k<-round(sqrt(n)) H<-Hill(X,k) M2<-MM(X,k,2) -H^2+(1/H+1)*2/M2 } BP4(X) HWm<-function(X,k){ #Hosking_Wallis method of moments X<-sort(X) n<-length(X) if(missing(k)) k<-n #k<-round(sqrt(n)) x<-mean(X[(n-k+1):n]) s2<-var(X[(n-k+1):n]) (0.5*(1-x^2/s2))^(-1) } HWm(X)