updated stats lib to include quartiles method

Change-Id: Ie949feca3066ef639959a49ced57e6ddcb46a6fa
Reviewed-on: https://gerrit.instructure.com/8159
Tested-by: Hudson <hudson@instructure.com>
Reviewed-by: Jacob Fugal <jacob@instructure.com>
This commit is contained in:
John Harrison 2012-01-18 15:59:24 -07:00 committed by Jacob Fugal
parent 1b42bcd1f5
commit 4c3602cf9d
2 changed files with 56 additions and 1 deletions

View File

@ -70,6 +70,45 @@ module Stats
def stddev; @items.empty? ? nil : Math::sqrt(variance); end
alias :standard_deviation :stddev
def quartiles
# returns the 1st quartile, 2nd quartile (median),
# and 3rd quartile for the data
# note that methodology for determining quartiles
# is not universally agreed upon (oddly enough)
# this method picks medians and gets
# results that are universally agreed upon.
# the method also give good results for quartiles
# when the sample size is small. When it is large
# then any old method will be close enough, but
# this one is very good
# method is summarized well here:
# http://www.stat.yale.edu/Courses/1997-98/101/numsum.htm
sorted_items = @items.sort
vals = []
# 1st Q
n = (sorted_items.length+1)/4.0 -1
weight = 1.0 -(n - n.to_i)
n = n.to_i
vals<<get_weighted_nth(sorted_items, n, weight)
# 2nd Q
n = (sorted_items.length+1)/2.0 -1
weight = 1.0 -(n - n.to_i)
n = n.to_i
vals<<get_weighted_nth(sorted_items, n, weight)
# 3rd Q
n = (sorted_items.length+1)*3.0/4.0 -1
weight = 1.0 -(n - n.to_i)
n = n.to_i
vals<<get_weighted_nth(sorted_items, n, weight)
vals
end
def histogram(bin_width=1.0,bin_base=0.0)
# returns a hash representing a histogram
# divides @items into bin_width sized bins
@ -93,5 +132,15 @@ module Stats
ret_val[:data] = bins
ret_val
end
private
def get_weighted_nth(sorted_items, n, weight)
n1 = sorted_items[n].to_f
n2 = sorted_items[n+1].to_f
val = n1*weight + n2*(1.0-weight)
val
end
end
end

View File

@ -115,5 +115,11 @@ describe Stats do
h.should == {:bin_width=>2.5, :bin_base=>1.5, :data=>{1.5=>2, 4.0=>3, -3.5=>1, -8.5=>1, -1.0=>2}}
end
it "should return quarties properly" do
c = Stats::Counter.new([6,4,2,-7,0,1,3,5,-3,20])
q = c.quartiles
q.should == [-0.75, 2.5, 5.25]
end
end
end