#!/usr/bin/env python
# Demonstrate use of variance reduction to exclude poor data
# Use data example from Bevington & Robinson with 1 outlier

from numpy.random import normal
import numpy
from kapteyn import kmpfit
from matplotlib.pyplot import figure, show, rc

def model(p, x):
   a, b = p
   y = a+b*x
   return y

def residuals(p, data):
   a, b = p
   x, y, err = data
   return (y-model(p,x))/err

# Artificial data
# x = numpy.array([2, 4, 6, 8, 10, 12])
# y = numpy.array([3.5, 7.2, 9.5, 17.1, 20.0, 25.5])
# err = numpy.array([0.55, 0.65, 0.74, 0.5, 0.85, 0.6])

# With outlier
x = numpy.array([25, 2, 4, 6, 8, 10, 12])
y = numpy.array([15.0, 3.5, 7.2, 9.5, 17.1, 20.0, 25.5])
#err = numpy.array([0.6, 0.55, 0.65, 0.74, 0.5, 0.85, 0.6])
err = numpy.ones(len(y))

# Prepare plot
fig = figure()
rc('legend', fontsize=8)
frame = fig.add_subplot(1,1,1)
frame.plot(x, y, 'go', label="data")
frame.set_xlabel("x")
frame.set_ylabel("y")
frame.set_title("Exclude poor data with variance reduction")

# Do the fit and find the Variance Reduction
params0 = (1,1)
fitter = kmpfit.Fitter(residuals=residuals, data=(x,y,err))
fitter.fit(params0=params0)

print "======== Fit results all data included =========="
print "Params:                      ", fitter.params
print "Uncertainties:               ", fitter.xerror
print "Errors assuming red.chi^2=1: ", fitter.stderr
print "Iterations:                  ", fitter.niter 
print "Function ev:                 ", fitter.nfev
print "dof:                         ", fitter.dof
print "chi^2, rchi2:                ", fitter.chi2_min, fitter.rchi2_min
print "Status:                      ", fitter.status
a, b = fitter.params

alpha = 0.01
from scipy.stats import chi2
rv = chi2(fitter.dof)
pval = 1-rv.cdf(fitter.chi2_min)
print "If H0 was correct, then" 
print "the probability to find a chi-squared higher than this:  ", pval
if pval < alpha:
   print "pval=%g. If we set the threshold to alpha=%f, we REJECT H0."%(pval, alpha)
else:
   print "pval=%g. If we set the threshold to alpha=%f, we ACCEPT H0."%(pval, alpha)

N = len(y)
varmod = (y-model(fitter.params,x))**2.0
y_av = y.sum()/N
vardat = (y-y_av)**2.0
vr0 = 100.0*(1-(varmod.sum()/vardat.sum()))
print "Unfiltered sample: variance reduction(%):", vr0

# Prepare loop where we exclude one point in each run
xf = numpy.zeros(N-1)
yf = numpy.zeros(N-1)
errf = numpy.zeros(N-1)
vr = []

fitter = kmpfit.Fitter(residuals=residuals, data=(xf,yf,errf))
N = len(yf)
for i in range(N):
   xf[:] = numpy.delete(x,i)      # Delete one point
   yf[:] = numpy.delete(y,i)
   errf[:] = numpy.delete(err,i)
   print "\nWe deleted from the sample: (%g,%g)"%(x[i],y[i])
   fitter.fit(params0=params0)
   print "chi^2, rchi2: ", fitter.chi2_min, fitter.rchi2_min
   varmod = (yf-model(fitter.params,xf))**2.0
   yf_av = yf.sum()/N
   vardat = (yf-yf_av)**2.0
   vr1 = 100.0*(1-(varmod.sum()/vardat.sum()))
   # A vr of 100% implies that the model is perfect
   # A bad model gives much lower values (sometimes negative)
   print "Variance reduction%:", vr1
   print "Improvement: %g%%"%(vr1-vr0)
   vr.append([vr1,i])

vr.sort()
print vr
i = vr[-1][1]
xf[:] = numpy.delete(x,i)      # Delete one point
yf[:] = numpy.delete(y,i)
errf[:] = numpy.delete(err,i)
fitter.fit(params0=params0)
print "Filtered sample: chi^2, rchi2: ", fitter.chi2_min, fitter.rchi2_min
pval = 1-rv.cdf(fitter.chi2_min)
print "If H0 was correct, then"
print "the probability to find a chi-squared higher than this:  ", pval
if pval < alpha:
   print "pval=%g. If we set the threshold to alpha=%f, we REJECT H0."%(pval, alpha)
else:
   print "pval=%g. If we set the threshold to alpha=%f, we ACCEPT H0."%(pval, alpha)

frame.set_ylim(0, 1.1*y.max())
frame.set_xlim(0, 1.1*x.max())
frame.errorbar(x, y, yerr=err, fmt='bo')
frame.plot(x, a+b*x, 'g', label="Fit unfiltered data")
frame.plot(x, model(fitter.params,x), 'm', label="Fit filtered data")
frame.plot((x[i],), (y[i],), marker="o", ms=20, color="r", alpha=0.3)
leg = frame.legend(loc=2)
show()