canvas-lms/app/models/page_view.rb

505 lines
16 KiB
Ruby

#
# Copyright (C) 2011 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
class PageView < ActiveRecord::Base
self.primary_key = 'request_id'
belongs_to :developer_key
belongs_to :user
belongs_to :account
belongs_to :real_user, :class_name => 'User'
belongs_to :asset_user_access
before_save :ensure_account
before_save :cap_interaction_seconds
belongs_to :context, polymorphic: [:course, :account, :group, :user, :user_profile], polymorphic_prefix: true
CONTEXT_TYPES = %w{Course Account Group User UserProfile}.freeze
attr_accessor :generated_by_hand
attr_accessor :is_update
# note that currently we never query page views from the perspective of the course;
# we simply don't record them for non-logged-in users in a public course
# if we ever do either of the above, we'll need to remove this, and figure out
# where such page views should belong (currently page views end up on the user's
# shard)
validates_presence_of :user_id
def self.generate(request, attributes={})
self.new(attributes).tap do |p|
p.url = LoggingFilter.filter_uri(request.url)[0,255]
p.http_method = request.request_method.downcase
p.controller = request.path_parameters[:controller]
p.action = request.path_parameters[:action]
p.session_id = request.session_options[:id].to_s.force_encoding(Encoding::UTF_8).presence
p.user_agent = request.user_agent
p.remote_ip = request.remote_ip
p.interaction_seconds = 5
p.created_at = Time.now
p.updated_at = Time.now
p.id = RequestContextGenerator.request_id
p.export_columns.each do |c|
v = p.send(c)
if !v.nil? && v.respond_to?(:force_encoding)
p.send("#{c}=", v.force_encoding(Encoding::UTF_8))
end
end
end
end
def self.find_for_update(request_id)
if PageView.updates_enabled? && (self.db? || self.cassandra?)
begin
# not using find_by_id or where(..).first because the cassandra
# codepath doesn't support it
find(request_id)
rescue ActiveRecord::RecordNotFound
nil
end
else
new { |p| p.request_id = request_id }
end
end
def token
Canvas::Security.create_jwt({
i: request_id,
u: Shard.global_id_for(user_id),
c: created_at.try(:utc).try(:iso8601, 2)
})
end
def self.decode_token(token)
data = Canvas::Security.decode_jwt(token)
return nil unless data
return {
request_id: data[:i],
user_id: data[:u],
created_at: data[:c]
}
end
def url
url = read_attribute(:url)
url && LoggingFilter.filter_uri(url)
end
def ensure_account
self.account_id ||= (self.context_type == 'Account' ? self.context_id : self.context.account_id) rescue nil
self.account_id ||= (self.context.is_a?(Account) ? self.context : self.context.account) if self.context
end
def cap_interaction_seconds
self.interaction_seconds = [self.interaction_seconds || 5, 10.minutes.to_i].min
end
# the list of columns we display to users, export to csv, etc
EXPORTED_COLUMNS = %w(request_id user_id url context_id context_type asset_id asset_type controller action interaction_seconds created_at user_request render_time user_agent participated account_id real_user_id http_method remote_ip)
def self.page_views_enabled?
!!page_view_method
end
def self.page_view_method
enable_page_views = Setting.get('enable_page_views', 'false')
return false if enable_page_views == 'false'
enable_page_views = 'db' if %w[true cache].include?(enable_page_views) # backwards compat
enable_page_views.to_sym
end
after_initialize :initialize_shard
def initialize_shard
# remember the page view method selected at the time of creation, so that
# we use the right method when saving
if PageView.cassandra? && new_record?
self.shard = Shard.birth
end
end
def self.db?
self.page_view_method == :db
end
def self.cassandra?
page_view_method == :cassandra
end
def self.pv4?
page_view_method == :pv4 || Setting.get('read_from_pv4', 'false') == 'true'
end
def self.global_storage_namespace?
cassandra? || pv4?
end
EventStream = EventStream::Stream.new do
database -> { Canvas::Cassandra::DatabaseBuilder.from_config(:page_views) }
table :page_views
id_column :request_id
record_type PageView
read_consistency_level -> { Canvas::Cassandra::DatabaseBuilder.read_consistency_setting(:page_views) }
add_index :user do
table :page_views_history_by_context
id_column :request_id
key_column :context_and_time_bucket
scrollback_limit -> { Setting.get('page_views_scrollback_limit:users', 52.weeks) }
# index by the page view's user, but use the user's global_asset_string
# when writing the index
entry_proc lambda{ |page_view| page_view.user }
key_proc lambda{ |user| user.global_asset_string }
end
self.raise_on_error = Rails.env.test?
on_error do |operation, record, exception|
Canvas::EventStreamLogger.error('PAGEVIEW', identifier, operation, record.to_json, exception.message.to_s)
end
end
def self.find(ids)
return super unless PageView.cassandra?
case ids
when Array
result = PageView::EventStream.fetch(ids)
raise ActiveRecord::RecordNotFound, "Couldn't find all PageViews with IDs (#{ids.join(',')}) (found #{result.length} results, but was looking for #{ids.length})" unless ids.length == result.length
result
else
find([ids]).first
end
end
def self.find_all_by_id(ids)
if PageView.cassandra?
PageView::EventStream.fetch(ids)
elsif PageView.pv4?
[]
else
where(request_id: ids).to_a
end
end
def self.find_by_id(id)
if PageView.cassandra?
PageView::EventStream.fetch([id]).first
elsif PageView.pv4?
nil
else
where(request_id: id).first
end
end
def self.from_attributes(attrs, new_record=false)
@blank_template ||= columns.inject({}) { |h,c| h[c.name] = nil; h }
attrs = attrs.slice(*@blank_template.keys)
shard = PageView.global_storage_namespace? ? Shard.birth : Shard.current
page_view = shard.activate do
if new_record
new{ |pv| pv.assign_attributes(attrs) }
else
instantiate(@blank_template.merge(attrs))
end
end
page_view
end
def self.updates_enabled?
Setting.get('skip_pageview_updates', 'false') != 'true'
end
def store
self.created_at ||= Time.zone.now
return false unless user
return false if self.is_update && !PageView.updates_enabled?
result = case PageView.page_view_method
when :log
Rails.logger.info "PAGE VIEW: #{self.attributes.to_json}"
when :db, :cassandra
self.save
end
self.store_page_view_to_user_counts
result
end
def do_update(params = {})
# nothing currently in the block is shard-sensitive, but to prevent
# accidents in the future, we'll add the correct shard activation now
shard = PageView.db? ? Shard.current : Shard.default
shard.activate do
updated_at = params['updated_at'] || self.updated_at || Time.now
updated_at = Time.parse(updated_at) if updated_at.is_a?(String)
seconds = self.interaction_seconds || 0
if params['interaction_seconds'].to_i > 0
seconds += params['interaction_seconds'].to_i
else
seconds += [5, (Time.now - updated_at)].min
seconds = [seconds, Time.now - created_at].min if created_at
end
self.updated_at = Time.now
self.interaction_seconds = seconds
self.is_update = true
end
end
def _create_record(*args)
return super unless PageView.cassandra?
self.created_at ||= Time.zone.now
user.shard.activate do
run_callbacks(:create) do
PageView::EventStream.insert(self)
@new_record = false
self.id
end
end
end
def _update_record(*args)
return super unless PageView.cassandra?
user.shard.activate do
run_callbacks(:update) do
PageView::EventStream.update(self)
true
end
end
end
scope :for_context, proc { |ctx| where(:context_type => ctx.class.name, :context_id => ctx) }
scope :for_users, lambda { |users| where(:user_id => users) }
def self.pv4_client
ConfigFile.cache_object('pv4') do |config|
Pv4Client.new(config['uri'], config['access_token'])
end
end
# returns a collection with very limited functionality
# basically, it responds to #paginate and returns a
# WillPaginate::Collection-like object
def self.for_user(user, options={})
viewer = options.delete(:viewer)
viewer = nil if viewer == user
viewer = nil if viewer && Account.site_admin.grants_any_right?(viewer, :view_statistics, :manage_students)
user.shard.activate do
if PageView.pv4?
result = pv4_client.for_user(user.global_id, **options)
result = AccountFilter.filter(result, viewer) if viewer
result
elsif PageView.cassandra?
result = PageView::EventStream.for_user(user, options)
result = AccountFilter.filter(result, viewer) if viewer
result
else
scope = self.where(:user_id => user).order('created_at desc')
scope = scope.where("created_at >= ?", options[:oldest]) if options[:oldest]
scope = scope.where("created_at <= ?", options[:newest]) if options[:newest]
if viewer
accounts = user.associated_accounts.shard(user).select { |a| a.grants_any_right?(viewer, :view_statistics, :manage_students) }
accounts << nil
scope = scope.where(account_id: accounts)
end
scope
end
end
end
class << self
def transaction(*args)
if PageView.cassandra?
# Rails 3 autosave associations re-assign the attributes;
# for sharding to work, the page view's shard has to be
# active at that point, but it's not cause it's normally
# done by the transaction, which we're skipping. so
# manually do that here
if current_scope
current_scope.activate do
yield
end
else
yield
end
else
super
end
end
end
def add_to_transaction
super unless PageView.cassandra?
end
def self.user_count_bucket_for_time(time)
utc = time.in_time_zone('UTC')
# round down to the last 5 minute mark -- so 03:43:28 turns into 03:40:00
utc = utc - ((utc.min % 5) * 60) - utc.sec
"active_users:#{utc.as_json}"
end
# this is not intended to be called often; only from console as a debugging measure
def self.active_user_counts_by_shard(time = Time.now)
members = Set.new
time = time..time unless time.is_a?(Range)
bucket_time = time.begin
while (time.cover?(bucket_time))
bucket = user_count_bucket_for_time(bucket_time)
members.merge(Canvas.redis.smembers(bucket))
bucket_time += 5.minutes
end
result = {}
members.each do |uid|
shard = Shard.shard_for(uid)
next unless shard
result[shard.id] ||= 0
result[shard.id] += 1
end
result
end
def store_page_view_to_user_counts
return unless Setting.get('page_views_store_active_user_counts', 'false') == 'redis' && Canvas.redis_enabled?
return unless self.created_at.present? && self.user.present?
exptime = Setting.get('page_views_active_user_exptime', 1.day.to_s).to_i
bucket = PageView.user_count_bucket_for_time(self.created_at)
Canvas.redis.sadd(bucket, self.user.global_id)
Canvas.redis.expire(bucket, exptime)
end
# to_csv uses these methods, see lib/ext/array.rb
def export_columns(format = nil)
PageView::EXPORTED_COLUMNS
end
def to_row(format = nil)
export_columns(format).map { |c| self.send(c).presence }
end
def app_name
DeveloperKey.find_cached(developer_key_id).try(:name) if developer_key_id
end
# utility class to migrate a postgresql/sqlite3 page_views table to cassandra
class CassandraMigrator < Struct.new(:start_at, :logger, :migration_data)
# if you interrupt and re-start the migrator, start_at cannot be changed,
# since it's saved in cassandra to persist the migration state
def initialize(skip_deleted_accounts = true, start_at = nil)
self.start_at = start_at || 52.weeks.ago
self.logger = Rails.logger
if skip_deleted_accounts
account_ids = Set.new(Account.root_accounts.active.pluck(:id))
else
account_ids = Set.new(Account.root_accounts.pluck(:id))
end
load_migration_data(account_ids)
end
def load_migration_data(account_ids)
self.migration_data = {}
account_ids.each do |account_id|
data = self.migration_data[account_id] = {}
data.merge!(cassandra.execute("SELECT last_created_at FROM page_views_migration_metadata_per_account WHERE shard_id = ? AND account_id = ?", Shard.current.id.to_s, account_id).fetch.try(:to_hash) || {})
if !(data['last_created_at'])
data['last_created_at'] = self.start_at
end
# cassandra returns Time not TimeWithZone objects
data['last_created_at'] = data['last_created_at'].in_time_zone
end
end
# this is the batch size per account, not the overall batch size
# returns true if any progress was made (if it makes sense to run_once again)
def run_once(batch_size = 3000)
self.migration_data.inject(false) do |progress, (account_id,_)|
run_once_for_account(account_id, batch_size) || progress
end
end
def run_once_for_account(account_id, batch_size)
data = self.migration_data[account_id]
raise("not configured for account id: #{account_id}") unless data
last_created_at = data['last_created_at']
# this could run into problems if one account gets more than
# batch_size page views created in the second on this boundary
finder_sql = PageView.where("account_id = ? AND created_at >= ?", account_id, last_created_at).
order(:created_at => :asc).limit(batch_size).to_sql
# query just the raw attributes, don't instantiate AR objects
rows = PageView.connection.select_all(finder_sql).to_a
return false if rows.empty?
inserted = rows.count do |attrs|
begin
created_at = attrs['created_at']
created_at = Time.zone.parse(created_at) unless created_at.is_a?(Time)
# if the created_at is the same as the last_created_at,
# we may have already inserted this page view
# use to_i here to avoid sub-second precision problems
if created_at.to_i == last_created_at.to_i
exists = !!cassandra.select_value("SELECT request_id FROM page_views WHERE request_id = ?", attrs['request_id'])
end
# now instantiate the AR object here, as a brand new record, so
# it's saved to cassandra as if it was just created (though
# created_at comes from the queried db attributes)
# we're bypassing the redis queue here, just saving directly to cassandra
if exists
false
else
# assumes PageView.cassandra? is true at this point
page_view = PageView.from_attributes(attrs, true)
page_view.save!
true
end
rescue
logger.error "failed migrating request id to cassandra: #{attrs['request_id']} : #{$!}"
false
end
end
logger.info "account #{Shard.current.id}~#{account_id}: added #{inserted} page views starting at #{last_created_at}"
last_created_at = rows.last['created_at']
last_created_at = Time.zone.parse(last_created_at) unless last_created_at.is_a?(Time)
cassandra.execute("UPDATE page_views_migration_metadata_per_account SET last_created_at = ? WHERE shard_id = ? AND account_id = ?", last_created_at, Shard.current.id.to_s, account_id)
data['last_created_at'] = last_created_at
return inserted > 0
end
def cassandra
PageView::EventStream.database
end
def run
while run_once
end
end
end
end