Commit f967c26d authored by panos's avatar panos Committed by Jérome Perrin

Insert DistributionFitting object that can fit statistical distributions with...

Insert DistributionFitting object that can fit statistical distributions with Maximum Likelihood Estimation and Kolmogorov-Smirnov distribution fitting test in a given data sample
parent 95fe0958
# ===========================================================================
# Copyright 2013 University of Limerick
#
# This file is part of DREAM.
#
# DREAM is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# DREAM is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with DREAM. If not, see <http://www.gnu.org/licenses/>.
# ===========================================================================
'''
Created on 19 Feb 2014
@author: Panos
'''
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
import rpy2.rinterface
from rpy2.rinterface import RRuntimeError
MASS= importr('MASS')
#=============================================== Distribution Fitting ============================================#
#This script consists of two objects for distribution fitting
#Distributions object: for maximum-likelihood fitting of univariate distributions without any information about likelihood analytical expression.
#DistFittest object: for Kolmogorov-Smirnov distribution fitting test in order to find the best distribution fitting for the given data points
#The Distributions object
class Distributions:
def Normal_distrfit(self,data):
data=robjects.FloatVector(data) #The given data sample changes into float vector in order to be handled by RPy2
rFitDistr=robjects.r['fitdistr'] #Call FitDistr function - R function
try: #try..except syntax to test if the data sample fits to Normal distribution
self.Normal= rFitDistr(data,'Normal') #It fits the normal distribution to the given data sample
except RRuntimeError:
return None #If it doesn't fit Return None
myDict = {'type':'Normal','mean':self.Normal[0][0],'sd': self.Normal[0][1]} #Create a dictionary with keys the name of the distribution and its parameters
return myDict #If there is no Error return the dictionary with the Normal distribution parameters for the given data sample
def Lognormal_distrfit(self,data):
data=robjects.FloatVector(data) #The given data sample changes into float vector in order to be handled by RPy2
rFitDistr=robjects.r['fitdistr'] #Call FitDistr function - R function
try: #try..except syntax to test if the data sample fits to Lognormal distribution
self.Lognormal= rFitDistr(data,'Lognormal') #It fits the Lognormal distribution to the given data sample
except RRuntimeError:
return None #If it doesn't fit Return None
myDict = {'type':'Lognormal','logmean':self.Lognormal[0][0], 'logsd':self.Lognormal[0][1]} #Create a dictionary with keys the name of the distribution and its parameters
return myDict #If there is no Error return the dictionary with the Lognormal distribution parameters for the given data sample
def NegativeBinomial_distrfit(self,data):
data=robjects.FloatVector(data)
rFitDistr=robjects.r['fitdistr']
try:
self.NegBinom= rFitDistr(data,'Negative Binomial')
except RRuntimeError:
return None
myDict = {'type':'NegativeBinomial','size':self.NegBinom[0][0],'mu':self.NegBinom[0][1]}
return myDict
def Exponential_distrfit(self,data):
data=robjects.FloatVector(data)
rFitDistr=robjects.r['fitdistr']
try:
self.Exp= rFitDistr(data,'Exponential')
except RRuntimeError:
return None
myDict = {'type':'Exponential','rate':self.Exp[0][0]}
return myDict
def Poisson_distrfit(self,data):
data=robjects.FloatVector(data)
rFitDistr=robjects.r['fitdistr']
try:
self.Poisson= rFitDistr(data,'Poisson')
except RRuntimeError:
return None
myDict = {'type':'Poisson','lambda':self.Poisson[0][0]}
return myDict
def Logistic_distrfit(self,data):
data=robjects.FloatVector(data)
rFitDistr=robjects.r['fitdistr']
try:
self.Logist= rFitDistr(data,'logistic')
except RRuntimeError:
return None
myDict = {'type':'Logistic','location':self.Logist[0][0],'scale': self.Logist[0][1]}
return myDict
def Geometric_distrfit(self,data):
data=robjects.FloatVector(data)
rFitDistr=robjects.r['fitdistr']
try:
self.Geom= rFitDistr(data,'Geometric')
except RRuntimeError:
return None
myDict = {'type':'Geometric','probability':self.Geom[0][0]}
return myDict
def Gamma_distrfit(self,data):
data=robjects.FloatVector(data)
rFitDistr=robjects.r['fitdistr']
try:
self.Gam=rFitDistr(data,'Gamma')
except RRuntimeError:
return None
myDict = {'type':'Gamma','shape':self.Gam[0][0],'rate':self.Gam[0][1]}
return myDict
def Weibull_distrfit(self,data):
data=robjects.FloatVector(data)
rFitDistr=robjects.r['fitdistr']
try:
self.Weib=rFitDistr(data,'weibull')
except RRuntimeError:
return None
myDict = {'type':'Weibull','shape':self.Weib[0][0], 'scale':self.Weib[0][1]}
return myDict
def Cauchy_distrfit(self,data):
data=robjects.FloatVector(data)
rFitDistr=robjects.r['fitdistr']
try:
self.Cauchy=rFitDistr(data,'Cauchy')
except RRuntimeError:
return None
myDict = {'type':'Cauchy','location':self.Cauchy[0][0],'scale':self.Cauchy[0][1]}
return myDict
#The Distribution Fitting test object
class DistFittest:
def Norm_kstest(self,data):
data=robjects.FloatVector(data) #The given data sample changes into float vector in order to be handled by RPy2
rkstest= robjects.r['ks.test'] #Call ks.test function - R function
rFitDistr=robjects.r['fitdistr'] #Call FitDistr function - R function
try: #try..except syntax to test if the data sample fits to Normal distribution
self.Normal= rFitDistr(data,'Normal') #It fits the normal distribution to the given data sample
except RRuntimeError:
return None #If it doesn't fit Return None
norm=self.Normal
self.Normtest= rkstest(data,"pnorm",norm[0][0],norm[0][1]) #It conducts the Kolmogorov-Smirnov test for Normal distribution to the given data sample
return self.Normtest #If there is no error returns the outcome of the Kolmogorov-Smirnov test (p-value,D)
def Lognorm_kstest(self,data): #The given data sample changes into float vector in order to be handled by RPy2
data=robjects.FloatVector(data) #Call ks.test function - R function
rkstest= robjects.r['ks.test'] #Call FitDistr function - R function
rFitDistr=robjects.r['fitdistr'] #It fits the Lognormal distribution to the given data sample
try: #try..except syntax to test if the data sample fits to Lognormal distribution
self.Lognormal= rFitDistr(data,'Lognormal')
except RRuntimeError:
return None #If it doesn't fit Return None
lognorm=self.Lognormal
self.Lognormtest= rkstest(data,"plnorm",lognorm[0][0],lognorm[0][1]) #It conducts the Kolmogorov-Smirnov test for Lognormal distribution to the given data sample
return self.Lognormtest #If there is no error returns the outcome of the Kolmogorov-Smirnov test (p-value,D)
def NegBinom_kstest(self,data):
data=robjects.FloatVector(data)
rkstest= robjects.r['ks.test']
rFitDistr=robjects.r['fitdistr']
try:
self.NegBinom= rFitDistr(data,'Negative Binomial')
except RRuntimeError:
return None
negbinom=self.NegBinom
self.NegBinomtest= rkstest(data,"pnbinom",negbinom[0][0],negbinom[1][1])
return self.NegBinomtest
def Exp_kstest(self,data):
data=robjects.FloatVector(data)
rkstest= robjects.r['ks.test']
rFitDistr=robjects.r['fitdistr']
try:
self.Exp= rFitDistr(data,'Exponential')
except RRuntimeError:
return None
exp=self.Exp
self.Exptest= rkstest(data,"pexp",exp[0][0])
return self.Exptest
def Pois_kstest(self,data):
data=robjects.FloatVector(data)
rkstest= robjects.r['ks.test']
rFitDistr=robjects.r['fitdistr']
try:
self.Poisson= rFitDistr(data,'Poisson')
except RRuntimeError:
return None
pois=self.Poisson
self.Poistest= rkstest(data,"ppois",pois[0])
return self.Poistest
def Geom_kstest(self,data):
data=robjects.FloatVector(data)
rkstest= robjects.r['ks.test']
rFitDistr=robjects.r['fitdistr']
try:
self.Geom= rFitDistr(data,'Geometric')
except RRuntimeError:
return None
geom=self.Geom
self.Geomtest= rkstest(data,"pgeom",geom[0])
return self.Geomtest
def Logis_kstest(self,data):
data=robjects.FloatVector(data)
rkstest= robjects.r['ks.test']
rFitDistr=robjects.r['fitdistr']
try:
self.Logist= rFitDistr(data,'logistic')
except RRuntimeError:
return None
logis=self.Logist
self.Logistest= rkstest(data,"plogis",logis[0][0],logis[0][1])
return self.Logistest
def Gam_kstest(self,data):
data=robjects.FloatVector(data)
rkstest= robjects.r['ks.test']
rFitDistr=robjects.r['fitdistr']
try:
self.Gam=rFitDistr(data,'Gamma')
except RRuntimeError:
return None
gam=self.Gam
self.Gamtest= rkstest(data,"pgamma",scale=gam[0][1],shape=gam[0][0])
return self.Gamtest
def Weib_kstest(self,data):
data=robjects.FloatVector(data)
rkstest= robjects.r['ks.test']
rFitDistr=robjects.r['fitdistr']
try:
self.Weib=rFitDistr(data,'weibull')
except RRuntimeError:
return None
weib=self.Weib
self.Weibtest= rkstest(data,"pweibull",scale=weib[0][1],shape=weib[0][0])
return self.Weibtest
def Cauchy_kstest(self,data):
data=robjects.FloatVector(data)
rkstest= robjects.r['ks.test']
rFitDistr=robjects.r['fitdistr']
try:
self.Cauchy=rFitDistr(data,'Cauchy')
except RRuntimeError:
return None
cauch=self.Cauchy
self.Cauchytest= rkstest(data,"pcauchy",cauch[0][0],cauch[0][1])
return self.Cauchytest
def ks_test(self,data): #Method that conducts the Kolmogorov-Smirnov statistical test and returns the best fitting distribution among the list of the available statistical distributions
data=robjects.FloatVector(data) #The given data sample changes into float vector in order to be handled by RPy2
#Create a list with strings the available statistical distributions
list1=('Normal','Lognormal','Exponential','Poisson', 'Geometric','Logistic','Gamma','Weibull', 'Cauchy')
#try...except syntaxes to test if the Kolmogorov-Smirnov statistical tests can be conducted to the available distributions
try:
arga=self.Normtest[0][0] #Create a variable that holds the D parameter of the Kolmogorov-Smirnov test in Normal distribution
except:
arga='' #in case of an error, the variable is left blank
try:
argb=self.Longnormtest[0][0] #Create a variable that holds the D parameter of the Kolmogorov-Smirnov test in Lognormal distribution
except:
argb=''
try:
argd=self.Exptest[0][0] #Create a variable that holds the D parameter of the Kolmogorov-Smirnov test in Exponential distribution
except:
argd='' #in case of an error, the variable is left blank
try:
arge=self.Poistest[0][0]
except:
arge=''
try:
argf=self.Geomtest[0][0]
except:
argf=''
try:
argg=self.Logistest[0][0]
except:
argg=''
try:
argh= self.Gamtest[0][0]
except:
argh=''
try:
argi=self.Weibtest[0][0]
except:
argi=''
try:
argj=self.Cauchytest[0][0]
except:
argj=''
#Create a list with parameters the above D parameters calculated by the Kolmogorov-Smirnov tests in the available statistical distributions
list2=[arga,argb,argd,arge,argf,argg,argh,argi,argj]
a=min(list2) #Create a variable that holds the minimum value from the above list
b=list2.index(a) #Create a variable that holds the actual position of the minimum value in the list
self=Distributions()
#Set of if...elif syntax in order to get a Python dictionary with the best fitting statistical distribution and its parameters
if list1[b]=='Normal': #Check if in list's b position is the Normal distribution
self.Normal_distrfit(data)
myDict = {'type':list1[b],'parameters':[self.Normal[0][0], self.Normal[0][1]]} #Create a dictionary with distribution's name and distribution's parameters
return myDict
elif list1[b]=='Lognormal':
self.Lognormal_distrfit(data)
myDict = {'type':list1[b],'parameters':[self.Lognormal[0][0], self.Lognormal[0][1]]}
return myDict
elif list1[b]=='Exponential':
self.Exponential_distrfit(data)
myDict = {'type':list1[b],'parameters':[self.Exp[0][0]]}
return myDict
elif list1[b]=='Poisson':
self.Poisson_distrfit(data)
myDict = {'type':list1[b],'parameters':[self.Poisson[0][0]]}
return myDict
elif list1[b]=='Geometric':
self.Geometric_distrfit(data)
myDict = {'type':list1[b],'parameters':[self.Geom[0][0]]}
return myDict
elif list1[b]=='Logistic':
self.Logistic_distrfit(data)
myDict = {'type':list1[b],'parameters':[self.Logist[0][0],self.Logist[0][1]]}
return myDict
elif list1[b]=='Gamma':
self.Gamma_distrfit(data)
myDict = {'type':list1[b],'parameters':[self.Gam[0][0],self.Gam[0][1]]}
return myDict
elif list1[b]=='Weibull':
self.Weibull_distrfit(data)
myDict = {'type':list1[b],'parameters':[self.Weib[0][0],self.Weib[0][1]]}
return myDict
else:
self.Cauchy_distrfit(data)
myDict = {'type':list1[b],'parameters':[self.Cauchy[0][0],self.Cauchy[0][1]]}
return myDict
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment