quiz item analysis generation, closes CNVS-3421

report generator to do quiz item analysis

usage:

quiz = Quiz.find(<id>)
summary = QuizStatistics::ItemAnalysis::Summary.new(quiz)
summary.each do |item|
  puts item.question_text
  puts "respondents: #{item.num_respondents}"
  puts "correct: #{item.num_respondents(:correct)}"
  puts "standard deviation: #{item.standard_deviation}"
  ...
  puts "----------------"
end

test plan:
N/A, see specs

Change-Id: Id36e4ef404af089f526cf9243a14d42eb52064a6
Reviewed-on: https://gerrit.instructure.com/18494
Tested-by: Jenkins <jenkins@instructure.com>
Reviewed-by: Joe Tanner <joe@instructure.com>
Product-Review: Jon Jensen <jon@instructure.com>
QA-Review: Jon Jensen <jon@instructure.com>
This commit is contained in:
Jon Jensen 2013-03-11 12:19:00 -06:00
parent a0905959b3
commit b420729e61
7 changed files with 467 additions and 0 deletions

View File

@ -0,0 +1,2 @@
class QuizStatistics::ItemAnalysis
end

View File

@ -0,0 +1,137 @@
class QuizStatistics::ItemAnalysis::Item
include TextHelper
def self.from(summary, question)
return unless allowed_types.include?(question[:question_type])
new summary, question
end
def self.allowed_types
["multiple_choice_question", "true_false_question"]
end
def initialize(summary, question)
@summary = summary
@question = question
# put the correct answer first
@answers = question[:answers].
each_with_index.
sort_by{ |answer, i| [-answer[:weight], i] }.
map{ |answer, i| answer[:id] }
@respondent_ids = []
@respondent_map = Hash.new{ |hash, key| hash[key] = [] }
@scores = []
end
def add_response(answer, respondent_id)
return unless answer[:answer_id] # blanks don't count for item stats
answer_id = answer[:answer_id]
@scores << (answer_id == @answers.first ? question[:points_possible] : 0)
@respondent_ids << respondent_id
@respondent_map[answer_id] << respondent_id
end
attr_reader :question
def question_text
strip_tags @question[:question_text]
end
# get number of respondents that match the specified filter(s). if no
# filters are given, just return the respondent count.
# filters may be:
# :correct
# :incorrect
# <summary bucket symbol> (e.g. :top)
# <answer id>
#
# e.g. num_respondents(:correct, :top) => # of students in the top 27%
# # who got it right
def num_respondents(*filters)
respondents = all_respondents
filters.each do |filter|
respondents &= respondents_for(filter)
end
respondents.size
end
# population variance, not sample variance, since we have all datapoints
def variance
@variance ||= SimpleStats.variance(@scores)
end
# population sd, not sample sd, since we have all datapoints
def standard_deviation
@sd ||= Math.sqrt(variance)
end
def difficulty_index
ratio_for(:correct)
end
def point_biserials
@answers.map { |answer|
point_biserial_for(answer)
}
end
def ratio_for(answer)
respondents_for(answer).size.to_f / all_respondents.size
end
def <=>(other)
sort_key <=> other.sort_key
end
def sort_key
[question[:position] || 10000, question_text, question[:id], -all_respondents.size]
end
private
def correct_answer
@answers.first
end
def all_respondents
@respondent_ids
end
def respondents_for(filter)
@respondents_for ||= {}
@respondents_for[filter] = if filter == :correct
respondents_for(correct_answer)
elsif filter == :incorrect
all_respondents - respondents_for(correct_answer)
elsif @summary.buckets[filter]
all_respondents & @summary.buckets[filter]
else # filter is an answer
@respondent_map[filter] || []
end
end
def point_biserial_for(answer)
@point_biserials ||= {}
@point_biserials[answer] ||= begin
mean, mean_other = mean_score_split(answer)
if mean
ratio = ratio_for(answer)
sd = @summary.standard_deviation(all_respondents)
(mean - mean_other) / sd * Math.sqrt(ratio * (1 - ratio))
end
end
end
# calculate:
# 1. the mean score of those who chose the given answer
# 2. the mean score of those who chose any other answer
def mean_score_split(answer)
these_respondents = respondents_for(answer)
other_respondents = all_respondents - these_respondents
[
@summary.mean_score_for(these_respondents),
@summary.mean_score_for(other_respondents)
]
end
end

View File

@ -0,0 +1,86 @@
class QuizStatistics::ItemAnalysis::Summary
include Enumerable
extend Forwardable
def_delegators :sorted_items, :size, :length, :each
def initialize(quiz, options = {})
@quiz = quiz
@attempts = quiz.quiz_submissions.map { |qs| qs.submitted_versions.first }
@options = options
@options[:buckets] ||= [
[:bottom, 0.27],
[:middle, 0.63],
[:top, 1]
]
aggregate_data
end
def aggregate_data
@attempts.each do |attempt|
add_respondent attempt.user_id, attempt.score
attempt.quiz_data.each_with_index do |question, i|
add_response question, attempt.submission_data[i], attempt.user_id
end
end
end
def add_response(question, answer, respondent_id)
@items ||= {}
@items[question] ||= QuizStatistics::ItemAnalysis::Item.from(self, question) or return
@items[question].add_response(answer, respondent_id)
end
def add_respondent(respondent_id, score)
@respondent_scores ||= {}
@respondent_scores[respondent_id] = score
end
# group the student ids into buckets according to score (e.g. bottom
# 27%, middle 46%, top 27%)
def buckets
@buckets ||= begin
buckets = @options[:buckets]
ranked_respondent_ids = @respondent_scores.sort_by(&:last).map(&:first)
Hash[buckets.each_with_index.map { |(name, cutoff), i|
floor = i > 0 ? (buckets[i - 1][1] * ranked_respondent_ids.length).round : 0
ceiling = (cutoff * ranked_respondent_ids.length).round
[name, ranked_respondent_ids[floor...ceiling]]
}]
end
end
def mean_score_for(respondent_ids)
return nil if respondent_ids.empty?
@respondent_scores.slice(*respondent_ids).values.sum * 1.0 / respondent_ids.size
end
def sorted_items
@sorted_items ||= @items.values.sort
end
# population variance, not sample variance, since we have all datapoints
def variance(respondent_ids = :all)
@variance ||= {}
@variance[respondent_ids] ||= begin
scores = (respondent_ids == :all ? @respondent_scores : @respondent_scores.slice(*respondent_ids)).values
SimpleStats.variance(scores)
end
end
# population sd, not sample sd, since we have all datapoints
def standard_deviation(respondent_ids = :all)
@sd ||= {}
@sd[respondent_ids] ||= Math.sqrt(variance(respondent_ids))
end
def alpha
@alpha ||= begin
items = @items.values
variance_sum = items.map(&:variance).sum
items.size / (items.size - 1.0) * (1 - variance_sum / variance)
end
end
end

10
lib/simple_stats.rb Normal file
View File

@ -0,0 +1,10 @@
module SimpleStats
def variance(items, type = :population)
return 0 if items.size < 2
divisor = type == :population ? items.length : items.length - 1
mean = items.sum / items.length.to_f
sum = items.map{ |item| (item - mean) ** 2 }.sum
(sum / divisor).to_f
end
module_function :variance
end

View File

@ -0,0 +1,92 @@
# like be_within, but also works on arrays/hashes that have reals
class BeApproximately
def initialize(expected, tolerance)
@expected = expected
@tolerance = tolerance
end
def matches?(target)
@target = target
approximates?(@target, @expected)
end
def approximates?(target, expected)
return true if target == expected
return false unless target.class == expected.class
case target
when Array; array_approximates(target, expected)
when Hash; hash_approximates(target, expected)
when Fixnum,
Bignum,
Float; real_approximates(target, expected)
else false
end
end
def array_approximates(target, expected)
target.size == expected.size &&
target.map.with_index.all? { |value, index|
approximates?(target[index], expected[index])
}
end
def hash_approximates(target, expected)
target.keys.sort == expected.keys.sort &&
target.keys.all? { |key|
approximates?(target[key], expected[key])
}
end
def real_approximates(target, expected)
(target - expected).abs <= @tolerance
end
def failure_message_for_should
"expected #{@target.inspect} to be approximately #{@expected}"
end
def failure_message_for_should_not
"expected #{@target.inspect} not to be approximately #{@expected}"
end
end
def be_approximately(expected, tolerance = 0.01)
BeApproximately.new(expected, tolerance)
end
# set up a quiz with graded submissions. supports T/F and multiple
# choice questions
#
# * answer_key is an array of correct answers (one of [A-DFT])
# * each submission is an array of a student's submitted answers
#
# question types are inferred from the correct answer, and multiple
# choice questions always have four possibilities
#
# note that you can specify point values for each question by
# providing an array for each answer (e.g. ["A", 2] instead of just "A")
def simple_quiz_with_submissions(answer_key, *submissions)
questions = answer_key.each_with_index.map { |answer, i|
points = 1
answer, points = answer if answer.is_a?(Array)
true_false = answer == 'T' || answer == 'F'
type = true_false ? 'true_false_question' : 'multiple_choice_question'
answers = Hash[(true_false ? ['T', 'F'] : 'A'..'D').each_with_index.map { |a, j|
["answer_#{j}", {:answer_text => a, :answer_weight => (a == answer ? 100 : 0), :id => (4 * i + j)}]
}]
{:question_data => {:name => "question #{i + 1}", :points_possible => points, :question_type => type, :answers => answers}}
}
assignment_quiz(questions)
submissions.each do |data|
sub = @quiz.generate_submission(user())
sub.mark_completed
sub.submission_data = Hash[data.each_with_index.map{ |answer, i|
answer = {"T" => "True", "F" => "False"}[answer] || answer
matched_answer = @questions[i].question_data[:answers].detect{ |a| a[:text] == answer}
["question_#{@questions[i].id}", matched_answer ? matched_answer[:id].to_s : nil]
}]
sub.grade_submission
end
@quiz.reload
end

View File

@ -0,0 +1,86 @@
require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper.rb')
require File.expand_path(File.dirname(__FILE__) + '/common.rb')
describe QuizStatistics::ItemAnalysis::Item do
describe ".from" do
it "should create an item for a supported question type" do
qq = {:question_type => "true_false_question", :answers => []}
QuizStatistics::ItemAnalysis::Item.from(nil, qq).should_not be_nil
end
it "should not create an item for an unsupported question type" do
qq = {:question_type => "essay_question"}
QuizStatistics::ItemAnalysis::Item.from(nil, qq).should be_nil
end
end
let(:item) {
simple_quiz_with_submissions %w{T T A}, %w{T T A}, %w{T F A}, %w{T T B}, %w{T T}
@summary = QuizStatistics::ItemAnalysis::Summary.new(@quiz)
@summary.sorted_items.last
}
describe "#num_respondents" do
it "should return all respondents" do
item.num_respondents.should == 3 # one guy didn't answer
end
it "should return correct respondents" do
item.num_respondents(:correct).should == 2
end
it "should return incorrect respondents" do
item.num_respondents(:incorrect).should == 1
end
it "should return respondents in a certain bucket" do
item.num_respondents(:top).should == 1
item.num_respondents(:middle).should == 2
item.num_respondents(:bottom).should == 0 # there is a guy, but he didn't answer this question
end
it "should correctly evaluate multiple filters" do
item.num_respondents(:top, :correct).should == 1
item.num_respondents(:top, :incorrect).should == 0
item.num_respondents(:middle, :correct).should == 1
item.num_respondents(:middle, :incorrect).should == 1
end
end
describe "#variance" do
it "should match R's output" do
# population variance, not sample variance (thus the adjustment)
# > v <- c(1, 1, 0)
# > var(v)*2/3
# [1] 0.2222222
item.variance.should be_approximately 0.2222222
end
end
describe "#standard_deviation" do
it "should match R's output" do
# population sd, not sample sd (thus the adjustment)
# > v <- c(1, 1, 0)
# > sqrt(var(v)/3*2)
# [1] 0.4714045
item.standard_deviation.should be_approximately 0.4714045
end
end
describe "#difficulty_index" do
it "should return the ratio of correct to incorrect" do
item.difficulty_index.should be_approximately 0.6666667
end
end
describe "#point_biserials" do
# > x<-c(3,2,2)
# > cor(x,c(1,1,0))
# [1] 0.5
# > cor(x,c(0,0,1))
# [1] -0.5
it "should match R's output" do
item.point_biserials.should be_approximately [0.5, -0.5, nil, nil]
end
end
end

View File

@ -0,0 +1,54 @@
require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper.rb')
require File.expand_path(File.dirname(__FILE__) + '/common.rb')
describe QuizStatistics::ItemAnalysis::Summary do
let(:summary) {
simple_quiz_with_submissions %w{T T A}, %w{T T A}, %w{T T B}, %w{T F B}, %w{T F B}
QuizStatistics::ItemAnalysis::Summary.new(@quiz)
}
describe "#each" do
it "should yield each item" do
count = 0
summary.each do |item|
item.should be_a QuizStatistics::ItemAnalysis::Item
count += 1
end
count.should == 3
end
end
describe "#alpha" do
it "should match R's output" do
# > mdat <- matrix(c(1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0), nrow=4)
# > cronbach.alpha(mdat)
#
# Cronbach's alpha for the 'mdat' data-set
#
# Items: 3
# Sample units: 4
# alpha: 0.545
summary.alpha.should be_approximately 0.545
end
end
describe "#variance" do
it "should match R's output" do
# population variance, not sample variance (thus the adjustment)
# > v <- c(3, 2, 1, 1)
# > var(v)*3/4
# [1] 0.6875
summary.variance.should be_approximately 0.6875
end
end
describe "#standard_deviation" do
it "should match R's output" do
# population sd, not sample sd (thus the adjustment)
# > v <- c(3, 2, 1, 1)
# > sqrt(var(v)*3/4)
# [1] 0.8291562
summary.standard_deviation.should be_approximately 0.8291562
end
end
end