2011-02-01 09:57:29 +08:00
|
|
|
#
|
|
|
|
# Copyright (C) 2011 Instructure, Inc.
|
|
|
|
#
|
|
|
|
# This file is part of Canvas.
|
|
|
|
#
|
|
|
|
# Canvas is free software: you can redistribute it and/or modify it under
|
|
|
|
# the terms of the GNU Affero General Public License as published by the Free
|
|
|
|
# Software Foundation, version 3 of the License.
|
|
|
|
#
|
|
|
|
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
|
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
|
|
|
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
|
|
|
# details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License along
|
|
|
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
|
|
|
|
module Stats
|
|
|
|
class Counter
|
2011-11-22 07:19:04 +08:00
|
|
|
attr_reader :max, :min, :sum, :sum_of_squares
|
|
|
|
alias :total :sum
|
2011-02-01 09:57:29 +08:00
|
|
|
|
2011-11-22 07:19:04 +08:00
|
|
|
def initialize(enumerable=[])
|
|
|
|
@items = []
|
|
|
|
@cache = {}
|
|
|
|
@max = nil
|
|
|
|
@min = nil
|
|
|
|
@sum = 0
|
|
|
|
@sum_of_squares = 0
|
|
|
|
enumerable.each { |item| self << item }
|
2011-02-01 09:57:29 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def each
|
2011-11-22 07:19:04 +08:00
|
|
|
@items.each {|i| yield i}
|
2011-02-01 09:57:29 +08:00
|
|
|
end
|
|
|
|
|
2011-11-22 07:19:04 +08:00
|
|
|
def <<(item)
|
|
|
|
raise "invalid value" if item.nil?
|
|
|
|
@cache = {}
|
|
|
|
@items << item
|
|
|
|
if @max.nil? || @min.nil?
|
|
|
|
@max = @min = item
|
|
|
|
else
|
|
|
|
if item > @max
|
|
|
|
@max = item
|
|
|
|
elsif item < @min
|
|
|
|
@min = item
|
2011-02-01 09:57:29 +08:00
|
|
|
end
|
|
|
|
end
|
2011-11-22 07:19:04 +08:00
|
|
|
@sum += item
|
|
|
|
@sum_of_squares += item**2
|
2011-02-01 09:57:29 +08:00
|
|
|
end
|
2011-11-22 07:19:04 +08:00
|
|
|
alias :push :<<
|
2011-02-01 09:57:29 +08:00
|
|
|
|
2011-11-22 07:19:04 +08:00
|
|
|
def size; @items.size; end
|
|
|
|
alias :count :size
|
|
|
|
def empty?; @items.size == 0; end
|
|
|
|
def sum_of_squares; @sum_of_squares; end
|
|
|
|
def mean; @items.empty? ? nil : (sum.to_f / @items.size); end
|
|
|
|
alias :avg :mean
|
2011-02-01 09:57:29 +08:00
|
|
|
|
2011-11-22 07:19:04 +08:00
|
|
|
# population variance
|
|
|
|
def var
|
|
|
|
@items.empty? ? nil : (sum_of_squares.to_f / @items.size) - (mean**2)
|
2011-02-01 09:57:29 +08:00
|
|
|
end
|
2011-11-22 07:19:04 +08:00
|
|
|
alias :variance :var
|
2011-02-01 09:57:29 +08:00
|
|
|
|
2011-11-22 07:19:04 +08:00
|
|
|
# population standard deviation
|
|
|
|
def stddev; @items.empty? ? nil : Math::sqrt(variance); end
|
|
|
|
alias :standard_deviation :stddev
|
2011-12-08 06:34:37 +08:00
|
|
|
|
2012-01-19 06:59:24 +08:00
|
|
|
def quartiles
|
|
|
|
# returns the 1st quartile, 2nd quartile (median),
|
|
|
|
# and 3rd quartile for the data
|
|
|
|
|
|
|
|
# note that methodology for determining quartiles
|
|
|
|
# is not universally agreed upon (oddly enough)
|
|
|
|
# this method picks medians and gets
|
|
|
|
# results that are universally agreed upon.
|
|
|
|
# the method also give good results for quartiles
|
|
|
|
# when the sample size is small. When it is large
|
|
|
|
# then any old method will be close enough, but
|
|
|
|
# this one is very good
|
|
|
|
# method is summarized well here:
|
|
|
|
# http://www.stat.yale.edu/Courses/1997-98/101/numsum.htm
|
|
|
|
sorted_items = @items.sort
|
|
|
|
vals = []
|
|
|
|
|
|
|
|
# 1st Q
|
|
|
|
n = (sorted_items.length+1)/4.0 -1
|
|
|
|
weight = 1.0 -(n - n.to_i)
|
|
|
|
n = n.to_i
|
|
|
|
vals<<get_weighted_nth(sorted_items, n, weight)
|
|
|
|
|
|
|
|
# 2nd Q
|
|
|
|
n = (sorted_items.length+1)/2.0 -1
|
|
|
|
weight = 1.0 -(n - n.to_i)
|
|
|
|
n = n.to_i
|
|
|
|
vals<<get_weighted_nth(sorted_items, n, weight)
|
|
|
|
|
|
|
|
# 3rd Q
|
|
|
|
n = (sorted_items.length+1)*3.0/4.0 -1
|
|
|
|
weight = 1.0 -(n - n.to_i)
|
|
|
|
n = n.to_i
|
|
|
|
vals<<get_weighted_nth(sorted_items, n, weight)
|
|
|
|
|
|
|
|
vals
|
|
|
|
end
|
|
|
|
|
|
|
|
|
2011-12-08 06:34:37 +08:00
|
|
|
def histogram(bin_width=1.0,bin_base=0.0)
|
|
|
|
# returns a hash representing a histogram
|
|
|
|
# divides @items into bin_width sized bins
|
|
|
|
# and counts how many items fall into each bin
|
|
|
|
# set bin_base to center off something other than zero
|
|
|
|
# this would usually be the median for a bell curve
|
|
|
|
|
|
|
|
# need floats for the math to work
|
|
|
|
bin_width = Float(bin_width)
|
|
|
|
bin_base = Float(bin_base)
|
|
|
|
ret_val = {:bin_width => bin_width, :bin_base => bin_base}
|
|
|
|
bins = {}
|
|
|
|
@items.each do |i|
|
|
|
|
bin = ((i-bin_base)/bin_width).floor * bin_width + bin_base
|
|
|
|
if bins.has_key?(bin)
|
|
|
|
bins[bin] = bins[bin] +1
|
|
|
|
else
|
|
|
|
bins[bin] = 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
ret_val[:data] = bins
|
|
|
|
ret_val
|
2012-01-19 06:59:24 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def get_weighted_nth(sorted_items, n, weight)
|
|
|
|
n1 = sorted_items[n].to_f
|
|
|
|
n2 = sorted_items[n+1].to_f
|
|
|
|
val = n1*weight + n2*(1.0-weight)
|
|
|
|
val
|
|
|
|
end
|
|
|
|
|
2011-02-01 09:57:29 +08:00
|
|
|
end
|
|
|
|
end
|