give GA an illegible user id

fixes KNO-391
flag  = none

Page views sent to GA are now identified with a value that is unique to
the user but is also not their Canvas ID (to preserve anonymity.) To
avoid further bloat in the User model, I chose to compute this id from
the user id directly by means of a digest as opposed to introducing
another field like User#lti_id.

  ---------
\ TEST PLAN /
 /         \
  \-/---\-/

- without being logged in, visit canvas and inspect the page source for
  the GA snippet:
  * verify "userId"[1] is not being set

- now log in and inspect the source again:

  * verify the "userId" is being set to a value that is _not_ the Canvas
    user id
  * reload the page and verify the value does not change

- (optional) log in as a different user and verify the userId value is
  different from the one given to the first user

[1]: https://developers.google.com/analytics/devguides/collection/analyticsjs/field-reference#userId

Change-Id: Iaec342c3631e689d754ab35663f4b9c6ac397257
Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/233416
Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com>
Reviewed-by: Davis Hyer <dhyer@instructure.com>
QA-Review: Davis Hyer <dhyer@instructure.com>
Product-Review: Davis Hyer <dhyer@instructure.com>
This commit is contained in:
Ahmad Amireh 2020-04-08 10:44:12 -06:00
parent 1dc92afb99
commit 3578d5b053
5 changed files with 80 additions and 36 deletions

View File

@ -11,6 +11,9 @@
<% if @google_analytics_page_title %>
ga('set', 'title', <%= raw(@google_analytics_page_title.to_json) %>);
<% end %>
<% if dimensions[:user_id] %>
ga('set', 'userId', <%= dimensions.fetch(:user_id).to_json.html_safe %>);
<% end %>
ga('set', 'dimension1', <%= dimensions.fetch(:enrollments).to_json.html_safe %>);
ga('set', 'dimension2', <%= dimensions.fetch(:admin).to_json.html_safe %>);
ga('set', 'dimension3', <%= dimensions.fetch(:masquerading).to_json.html_safe %>);

View File

@ -118,6 +118,12 @@ module Canvas::Security
)
end
def self.hmac_sha512(str, encryption_key = nil)
OpenSSL::HMAC.hexdigest(
OpenSSL::Digest.new('sha512'), (encryption_key || self.encryption_key), str
)
end
def self.verify_hmac_sha1(hmac, str, options = {})
keys = options[:keys] || []
keys += [options[:key]] if options[:key]

View File

@ -44,9 +44,20 @@ module GoogleAnalyticsDimensions
admin: _encode_admin_status(roles: user_roles),
enrollments: _encode_enrollments(roles: user_roles),
masquerading: _encode_masquerading_status(user: user, real_user: real_user),
user_id: _compute_non_compromising_user_id(user: user),
}
end
# we only need some identifier that GA can utilize to track users across
# different devices but we don't want it to know who the users are (e.g. their
# canvas id)
#
# see https://support.google.com/analytics/answer/2992042?hl=en
# see https://developers.google.com/analytics/devguides/collection/analyticsjs/cookies-user-id
def self._compute_non_compromising_user_id(user:)
user ? Canvas::Security.hmac_sha512(user.id.to_s)[0,32] : nil
end
def self._encode_admin_status(roles:)
# again, look at User#user_roles for the definition
%w[ admin root_admin ].map do |enrollment_type|

View File

@ -18,36 +18,32 @@
require_relative '../spec_helper'
describe GoogleAnalyticsDimensions do
subject { described_class.method(:calculate) }
subject do
->(account: Account.default, real_user: nil, user: nil) do
described_class.calculate(
domain_root_account: account,
real_user: real_user,
user: user
)
end
end
it 'tells when someone is a student' do
dims = subject[
domain_root_account: Account.default,
real_user: nil,
user: student_in_course(active_all: true).user,
]
dims = subject[user: student_in_course(active_all: true).user]
expect(dims).to have_key(:enrollments)
expect(dims[:enrollments]).to eq('100')
end
it 'tells when someone is a teacher' do
dims = subject[
domain_root_account: Account.default,
real_user: nil,
user: teacher_in_course(active_all: true).user,
]
dims = subject[user: teacher_in_course(active_all: true).user]
expect(dims).to have_key(:enrollments)
expect(dims[:enrollments]).to eq('010')
end
it 'tells when someone is an observer' do
dims = subject[
domain_root_account: Account.default,
real_user: nil,
user: observer_in_course(active_all: true).user,
]
dims = subject[user: observer_in_course(active_all: true).user]
expect(dims).to have_key(:enrollments)
expect(dims[:enrollments]).to eq('001')
@ -55,8 +51,6 @@ describe GoogleAnalyticsDimensions do
it 'tells when someone is both a teacher and an observer' do
dims = subject[
domain_root_account: Account.default,
real_user: nil,
user: teacher_in_course(active_all: true).user.tap do |user|
observer_in_course(user: user)
end
@ -68,8 +62,6 @@ describe GoogleAnalyticsDimensions do
it "it tells when someone is superman (or woman, lest i get fired)" do
dims = subject[
domain_root_account: Account.default,
real_user: nil,
user: teacher_in_course(active_all: true).user.tap do |user|
student_in_course(user: user, active_all: true)
observer_in_course(user: user, active_all: true)
@ -81,22 +73,14 @@ describe GoogleAnalyticsDimensions do
end
it 'yields an empty set of enrollments for an anonymous session' do
dims = subject[
domain_root_account: Account.default,
real_user: nil,
user: nil,
]
dims = subject[]
expect(dims).to have_key(:enrollments)
expect(dims[:enrollments]).to eq('000')
end
it "tells when someone is an admin" do
dims = subject[
domain_root_account: Account.default,
real_user: nil,
user: account_admin_user
]
dims = subject[user: account_admin_user]
expect(dims).to have_key(:admin)
expect(dims[:admin]).to eq '11'
@ -105,7 +89,6 @@ describe GoogleAnalyticsDimensions do
it 'tells when someone is masquerading' do
dims = subject[
domain_root_account: Account.default,
real_user: account_admin_user,
user: student_in_course(active_all: true).user
]
@ -116,13 +99,40 @@ describe GoogleAnalyticsDimensions do
it 'tells when someone is not masquerading' do
user = user_with_pseudonym(active_all: true)
dims = subject[
domain_root_account: Account.default,
real_user: user,
user: user
]
dims = subject[real_user: user, user: user]
expect(dims).to have_key(:masquerading)
expect(dims[:masquerading]).to eq '0'
end
describe 'identification' do
it 'provides a consistent, predictable user identifier' do
user = user_with_pseudonym(active_all: true)
dims = subject[user: user]
dims_again = subject[user: user]
expect(dims).to have_key(:user_id)
expect(dims_again).to have_key(:user_id)
expect(dims[:user_id]).to eq(dims_again[:user_id])
end
it 'provides an identifier that is unique for the user' do
emperor_tamarin = user_with_pseudonym(active_all: true)
maryjane = user_with_pseudonym(active_all: true)
expect(
subject[user: emperor_tamarin][:user_id]
).not_to eq(
subject[user: maryjane][:user_id]
)
end
it 'provides an identifier that is NOT the canvas ID' do
user = user_with_pseudonym(active_all: true)
dims = subject[user: user]
expect(dims[:user_id]).to be_present
expect(dims[:user_id]).not_to eq(user.id)
end
end
end

View File

@ -119,5 +119,19 @@ describe "google analytics" do
masquerading: '1',
)
end
it "should identify the user" do
start_with { course_with_student_logged_in }
alternative_user_id = GoogleAnalyticsDimensions.calculate(
domain_root_account: Account.default,
real_user: nil,
user: @student
).fetch(:user_id)
expect(ga_script).to include(
"ga('set', 'userId', #{alternative_user_id.to_json})"
)
end
end
end