#!/usr/bin/env python
#------------------------------------------------------------
# Purpose: Program to best fit straight line parameters
#          to data given by Pearson, 1901
# Vog, 12 Dec, 2011
#
# The data for x and y are from Pearson
# Pearson, K. 1901. On lines and planes of closest fit to systems 
# of points in space. Philosophical Magazine 2:559-572
# Copy of this article can be found at:
# stat.smmu.edu.cn/history/pearson1901.pdf
#
# Pearson's best fit through (3.82,3.70) ->
# a=5.784  b=-0.54556
#------------------------------------------------------------
import numpy
from matplotlib.pyplot import figure, show, rc
from kapteyn import kmpfit

def model(p, x):
   # Model: y = a + numpy.tan(theta)*x
   a, theta = p
   return a + numpy.tan(theta)*x

def residuals(p, data):
   # Residuals function for data with errors in both coordinates
   a, theta = p
   x, y = data
   B = numpy.tan(theta)
   wi = 1/numpy.sqrt(1.0 + B*B)
   d = wi*(y-model(p,x))
   return d

def residuals2(p, data):
   # Residuals function for data with errors in y only
   a, b = p
   x, y = data
   d = (y-model(p,x))
   return d


# Pearsons data
x = numpy.array([0.0, 0.9, 1.8, 2.6, 3.3, 4.4, 5.2, 6.1, 6.5, 7.4])
y = numpy.array([5.9, 5.4, 4.4, 4.6, 3.5, 3.7, 2.8, 2.8, 2.4, 1.5])
N = len(x)
beta0 = [5.0, 0.0]         # Initial estimates

# Analytical solutions following Pearson's formulas
print "\nAnalytical solution"
print "==================="
x_av = x.mean()
y_av = y.mean()
sx = (x-x_av)
sy = (y-y_av)
Sx = sx.sum()
Sy = sy.sum()
Sxx = (sx*sx).sum()
Syy = (sy*sy).sum()
Sxy = (sx*sy).sum()
tan2theta = 2*Sxy/(Sxx-Syy)
twotheta = numpy.arctan(tan2theta)
b_pearson = numpy.tan(twotheta/2)
a_pearson = y_av - b_pearson*x_av
print "Best fit parameters: a=%.10f  b=%.10f"%(a_pearson,b_pearson)
rxy = Sxy/numpy.sqrt(Sxx*Syy)
print "Pearson's Corr. coef: ", rxy
tan2theta = 2*rxy*numpy.sqrt(Sxx*Syy)/(Sxx-Syy)
twotheta = numpy.arctan(tan2theta)
print "Pearson's best tan2theta, theta, slope: ", \
      tan2theta, 0.5*twotheta, numpy.tan(0.5*twotheta)
b1 = rxy*numpy.sqrt(Syy)/numpy.sqrt(Sxx)
print "b1 (Y on X), slope: ", b1, b1
b2 = rxy*numpy.sqrt(Sxx)/numpy.sqrt(Syy)
print "b2 (X on Y), slope", b2, 1/b2


# Prepare fit routine
fitobj = kmpfit.Fitter(residuals=residuals, data=(x, y))
fitobj.fit(params0=beta0)
print "\n======== Results kmpfit: effective variance ========="
print "Params:                 ", fitobj.params[0], numpy.tan(fitobj.params[1])
print "Covariance errors:      ", fitobj.xerror
print "Standard errors         ", fitobj.stderr
print "Chi^2 min:              ", fitobj.chi2_min
print "Reduced Chi^2:          ", fitobj.rchi2_min

# Prepare fit routine
fitobj2 = kmpfit.Fitter(residuals=residuals2, data=(x, y))
fitobj2.fit(params0=beta0)
print "\n======== Results kmpfit Y on X ========="
print "Params:                 ", fitobj2.params
print "Covariance errors:      ", fitobj2.xerror
print "Standard errors         ", fitobj2.stderr
print "Chi^2 min:              ", fitobj2.chi2_min
print "Reduced Chi^2:          ", fitobj2.rchi2_min
a1, b1 = fitobj2.params[0], numpy.tan(fitobj2.params[1])

fitobj3 = kmpfit.Fitter(residuals=residuals2, data=(y, x))
fitobj3.fit(params0=(0,5))
print "\n======== Results kmpfit X on Y ========="
print "Params:                 ", fitobj3.params
print "Covariance errors:      ", fitobj3.xerror
print "Standard errors         ", fitobj3.stderr
print "Chi^2 min:              ", fitobj3.chi2_min
print "Reduced Chi^2:          ", fitobj3.rchi2_min
a2, b2  = fitobj3.params[0], numpy.tan(fitobj3.params[1])
A2 = -a2/b2; B2 = 1/b2   # Get values for XY plane 

print "\nLeast squares solution"
print "======================"
print "a1, b1 (Y on X)", a1, b1
print "a2, b2 (X on Y)", A2, B2
tan2theta = 2*b1*b2/(b2-b1)
twotheta = numpy.arctan(tan2theta)
best_slope = numpy.tan(0.5*twotheta)
print "Best fit tan2theta, Theta, slope: ", tan2theta, \
       0.5*twotheta, best_slope
best_offs = y_av - best_slope*x_av
print "Best fit parameters: a=%.10f  b=%.10f"%(best_offs,best_slope)

bislope = (b1*B2-1+numpy.sqrt((1+b1*b1)*(1+B2*B2)))/(b1+B2)
abi = y_av - bislope*x_av
print "Bisector through centroid a, b: ",abi, bislope 
bbi = numpy.arctan(bislope)  # Back to angle again
B2_angle = numpy.arctan(B2)  # Back to angle again


# Some plotting
rc('font', size=9)
rc('legend', fontsize=7)
fig = figure(1)
d = (x.max() - x.min())/10
for i in [0,1]:
   if i == 0: 
      X = numpy.linspace(x.min()-d, x.max()+d, 50)
      frame = fig.add_subplot(2,1,i+1, aspect=1, adjustable='datalim')
   else:
      X = numpy.linspace(-0.9, -0.3, 50)
      frame = fig.add_subplot(2,1,i+1, aspect=1)
   frame.plot(x, y, 'oy')
   frame.plot(X, model(fitobj.params,X), 'c', ls='--', lw=4, label="kmpfit effective variance")
   frame.plot(X, model(fitobj2.params,X), 'g', label="kmpfit regression Y on X")
   frame.plot(X, model((A2,B2_angle),X), 'r', label="kmpfit regression X on Y")
   frame.plot(X, model((abi,bbi),X), 'y', label="Bisector")
   frame.plot(X, model((a_pearson,numpy.arctan(b_pearson)),X), 'm', lw=2, label="Pearson's values")
   frame.plot((x_av,),(y_av,), '+k', markersize=14)  # Mark the centroid
   frame.set_ylabel("Y")
   frame.grid(True)
   if i == 1:
      frame.set_xlabel("X")
      frame.set_xlim(-0.9,-0.3)
      frame.set_ylim(6,6.38)
   else:
      frame.set_title("$\mathrm{Pearson's\ data\ and\ model:\ } y = a+b*x$")
   leg = frame.legend(loc=1)
show()