“Every wall is a door.” [Ralph Waldo Emerson]
Abstract
Real life data sometimes includes extreme (outlier) values which you might want to ignore or to exclude:
Appendix sbORB Code
Please read my Disclaimer.
Option Explicit
Function sbORB(rY As Range, rX As Range, _
Optional dSigmaFactor As Double = 3#, _
Optional dMaxOutlierPercentage As Double = 0.5) As Variant
'sbORB() = outlier resistant beta returns a beta and
'an alpha where y = beta * x + alpha is most accurate
'for (almost) all given x in rX and y in rY.
'"Almost" means that we successively (one by one) throw out outliers
'which have a distance of > dSigmaFactor * STDEV_of_all_Distances
'from the least square (LS) proxy.
'Source (EN): http://www.sulprobil.de/sborb_en/
'Source (DE): http://www.berndplumhoff.de/sborb_de/
'(C) (P) by Bernd Plumhoff 24-Jun-2007 PB V0.9
Dim vLinEst As Variant 'store LinEst() result of recent LS proxy during iterations
Dim dm2 As Double 'ortogonal slope to recent LS proxy
Dim dc As Double 'Constant c in: y2=m2*x2+c which is ortogonal to LS proxy through a given point
Dim dx2 As Double 'x2 in: y2 = m2 * x2 + c which is ortogonal to LS proxy through a given point
Dim dy2 As Double 'y2 in: y2 = m2 * x2 + c which is ortogonal to LS proxy through a given point
Dim i As Long, j As Long
Dim lcount As Long 'holds current number of live points
Dim lcount_orig As Long 'original (starting) number of points
Dim lcount_old As Long 'holds number of live points of previous iteration
Dim daverage As Double 'average of distances to LS proxy of current iterations' live points
Dim dstdev As Double 'Stdev of distances to LS proxy of current iterations' live points
Dim dDistMax As Double
Dim lDistMaxIdx As Long
lcount = rX.Rows.Count
If rX.Columns.Count > lcount Then
lcount = rX.Columns.Count
End If
lcount_orig = lcount
lcount_old = lcount + 1
ReDim dDist(1 To lcount) As Double 'store distances of live points to recent LS proxy (line)
ReDim dX(1 To lcount) As Double
ReDim dY(1 To lcount) As Double 'store coordinates of "live" points during iterations
'read data row-wise or column-wise
If rX.Rows.Count > rX.Columns.Count Then
For i = 1 To lcount
dX(i) = rX.Cells(i, 1)
dY(i) = rY.Cells(i, 1)
Next i
Else
For i = 1 To lcount
dX(i) = rX.Cells(1, i)
dY(i) = rY.Cells(1, i)
Next i
End If
Do
lcount_old = lcount
ReDim Preserve dDist(1 To lcount) As Double 'Store distances of live points to last LS proxy
ReDim Preserve dX(1 To lcount) As Double
ReDim Preserve dY(1 To lcount) As Double 'Store coordinates of "live" points during iterations
vLinEst = Application.WorksheetFunction.LinEst(dY, dX, True, True)
dDistMax = 0#
lDistMaxIdx = 1
For i = 1 To lcount
'Calculate distances of live points to recent LS proxy
dm2 = -1# / vLinEst(1, 1)
dc = dY(i) - dX(i) * dm2
dx2 = (dc - vLinEst(1, 2)) / (vLinEst(1, 1) - dm2)
dy2 = dm2 * dx2 + dc
dDist(i) = Sqr((dX(i) - dx2) * (dX(i) - dx2) + (dY(i) - dy2) * (dY(i) - dy2))
'remember largest distance and its index
If dDist(i) > dDistMax Then
dDistMax = dDist(i)
lDistMaxIdx = i
End If
Next i
'calculate average and standard deviation of live points' distances to LS proxy
daverage = Application.WorksheetFunction.Average(dDist)
dstdev = Application.WorksheetFunction.StDev(dDist)
' 'kill points with distance > dSigmaFactor * dstdev 'Attention: might erase too many points
' j = 1
' For i = 1 To lcount
' If dDist(i) <= dstdev * dSigmaFactor Then
' dX(j) = dX(i)
' dY(j) = dY(i)
' j = j + 1
' Else
' Debug.Print "Lcount: " & lcount & ". Throwing out (" & dX(i) & ";" & dY(i) & ")"
' End If
' Next i
' lcount = j - 1
'kill point with largest distance > dSigmaFactor * dstdev
If dDist(lDistMaxIdx) >= dstdev * dSigmaFactor Then
Debug.Print "Lcount: " & lcount & ". Throwing out (" & dX(lDistMaxIdx) & _
";" & dY(lDistMaxIdx) & ")"
dX(lDistMaxIdx) = dX(lcount)
dY(lDistMaxIdx) = dY(lcount)
lcount = lcount - 1
End If
Loop While lcount_old > lcount And lcount / lcount_orig > 1# - dMaxOutlierPercentage
If lcount < lcount_old Then
vLinEst = Application.WorksheetFunction.LinEst(dY, dX, True, True)
End If
sbORB = vLinEst
End Function
Download
Please read my Disclaimer.
sbORB.xlsm [24 KB Excel file, open and use at your own risk]