#!/usr/bin/env python # Demonstrate use of variance reduction to exclude poor data # Use data example from Bevington & Robinson with 1 outlier from numpy.random import normal import numpy from kapteyn import kmpfit from matplotlib.pyplot import figure, show, rc def model(p, x): a, b = p y = a+b*x return y def residuals(p, data): a, b = p x, y, err = data return (y-model(p,x))/err # Artificial data # x = numpy.array([2, 4, 6, 8, 10, 12]) # y = numpy.array([3.5, 7.2, 9.5, 17.1, 20.0, 25.5]) # err = numpy.array([0.55, 0.65, 0.74, 0.5, 0.85, 0.6]) # With outlier x = numpy.array([25, 2, 4, 6, 8, 10, 12]) y = numpy.array([15.0, 3.5, 7.2, 9.5, 17.1, 20.0, 25.5]) #err = numpy.array([0.6, 0.55, 0.65, 0.74, 0.5, 0.85, 0.6]) err = numpy.ones(len(y)) # Do the fit and find the Variance Reduction params0 = (1,1) fitter = kmpfit.Fitter(residuals=residuals, data=(x,y,err)) fitter.fit(params0=params0) print("======== Fit results all data included ==========") print("Params: ", fitter.params) print("Uncertainties: ", fitter.xerror) print("Errors assuming red.chi^2=1: ", fitter.stderr) print("Iterations: ", fitter.niter) print("Function ev: ", fitter.nfev) print("dof: ", fitter.dof) print("chi^2, rchi2: ", fitter.chi2_min, fitter.rchi2_min) print("Status: ", fitter.status) a, b = fitter.params alpha = 0.01 from scipy.stats import chi2 rv = chi2(fitter.dof) pval = 1-rv.cdf(fitter.chi2_min) print("If H0 was correct, then") print("the probability to find a chi-squared higher than this: ", pval) if pval < alpha: print("pval=%g. If we set the threshold to alpha=%f, we REJECT H0."%(pval, alpha)) else: print("pval=%g. If we set the threshold to alpha=%f, we ACCEPT H0."%(pval, alpha)) N = len(y) varmod = (y-model(fitter.params,x))**2.0 y_av = y.sum()/N vardat = (y-y_av)**2.0 vr0 = 100.0*(1-(varmod.sum()/vardat.sum())) print("Unfiltered sample: variance reduction(%):", vr0) # Prepare loop where we exclude one point in each run xf = numpy.zeros(N-1) yf = numpy.zeros(N-1) errf = numpy.zeros(N-1) vr = [] fitter = kmpfit.Fitter(residuals=residuals, data=(xf,yf,errf)) N = len(yf) for i in range(N): xf[:] = numpy.delete(x,i) # Delete one point yf[:] = numpy.delete(y,i) errf[:] = numpy.delete(err,i) print("\nWe deleted from the sample: (%g,%g)"%(x[i],y[i])) fitter.fit(params0=params0) print("chi^2, rchi2: ", fitter.chi2_min, fitter.rchi2_min) varmod = (yf-model(fitter.params,xf))**2.0 yf_av = yf.sum()/N vardat = (yf-yf_av)**2.0 vr1 = 100.0*(1-(varmod.sum()/vardat.sum())) # A vr of 100% implies that the model is perfect # A bad model gives much lower values (sometimes negative) print("Variance reduction%:", vr1) print("Improvement: %g%%"%(vr1-vr0)) vr.append([vr1,i]) vr.sort() print(vr) i = vr[-1][1] xf[:] = numpy.delete(x,i) # Delete one point yf[:] = numpy.delete(y,i) errf[:] = numpy.delete(err,i) fitter.fit(params0=params0) print("Filtered sample: chi^2, rchi2: ", fitter.chi2_min, fitter.rchi2_min) pval = 1-rv.cdf(fitter.chi2_min) print("If H0 was correct, then") print("the probability to find a chi-squared higher than this: ", pval) if pval < alpha: print("pval=%g. If we set the threshold to alpha=%f, we REJECT H0."%(pval, alpha)) else: print("pval=%g. If we set the threshold to alpha=%f, we ACCEPT H0."%(pval, alpha)) # Prepare plot fig = figure() frame = fig.add_subplot(1,1,1) rc('legend', fontsize=8) frame.set_xlabel("x") frame.set_ylabel("y") frame.set_title("Exclude poor data with variance reduction") frame.set_ylim(0, 1.1*y.max()) frame.set_xlim(0, 1.1*x.max()) frame.errorbar(x, y, yerr=numpy.abs(err), fmt='bo', ms=3, label="data") frame.plot(x, a+b*x, 'g', label="Fit unfiltered data") frame.plot(x, model(fitter.params,x), 'm', label="Fit filtered data") # Mark the exluded data point frame.plot((x[i],), (y[i],), marker="o", ms=20, color="r", alpha=0.3) leg = frame.legend(loc=2) show()