recreate the pg_collkey based indexes to use collation level 3

closes CNVS-38143

this commit aligns the collation schemes for postgres, ruby and
javascript to the same level.  this means that when sorting an
array of strings in any of the three environments, the result
will be identical.

this also stops relying on default collation strengths in ruby
and postgres and explicitly specifies the default collation
strengths to avoid surprises later.

test plan:
* Verify migrations pass with multiple shards
* Verify migrations rollback properly individually

Change-Id: I6023b364c260c35b85a8d6364ccc346e2ef7d159
Reviewed-on: https://gerrit.instructure.com/118675
Tested-by: Jenkins
Reviewed-by: Derek Bender <djbender@instructure.com>
Reviewed-by: Cody Cutrer <cody@instructure.com>
Product-Review: Keith T. Garner <kgarner@instructure.com>
QA-Review: Keith T. Garner <kgarner@instructure.com>
This commit is contained in:
Shahbaz Javeed 2017-07-12 12:33:55 -04:00
parent feeef481fe
commit c2cacc5b23
7 changed files with 149 additions and 5 deletions

View File

@ -23,8 +23,8 @@ export default {
const locale_map = {zh_Hant: 'zh-Hant'}
locale = locale_map[locale] || locale
return x.localeCompare(y, locale, {
sensitivity: 'accent',
ignorePunctuation: true,
sensitivity: 'variant',
ignorePunctuation: false,
numeric: true,
})
},

View File

@ -398,7 +398,10 @@ class ActiveRecord::Base
@collkey = connection.extension_installed?(:pg_collkey)
end
if @collkey
"#{@collkey}.collkey(#{col}, '#{Canvas::ICU.locale_for_collation}', false, 0, true)"
# The collation level of 3 is the default, but is explicitly specified here and means that
# case, accents and base characters are all taken into account when creating a collation key
# for a string - more at https://pgxn.org/dist/pg_collkey/0.5.1/
"#{@collkey}.collkey(#{col}, '#{Canvas::ICU.locale_for_collation}', false, 3, true)"
else
"CAST(LOWER(replace(#{col}, '\\', '\\\\')) AS bytea)"
end

View File

@ -0,0 +1,39 @@
#
# Copyright (C) 2017 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
class UpdateCollationKeyIndexes < ActiveRecord::Migration[5.0]
tag :predeploy
disable_ddl_transaction!
def change
collkey = connection.extension_installed?(:pg_collkey)
return unless collkey
rename_index :users, :index_users_on_sortable_name, :index_users_on_sortable_name_old
rename_index :attachments, :index_attachments_on_folder_id_and_file_state_and_display_name,
:index_attachments_on_folder_id_and_file_state_and_display_name1
add_index :users, "#{collkey}.collkey(sortable_name, 'root', false, 3, true)",
algorithm: :concurrently, name: :index_users_on_sortable_name
add_index :attachments, "folder_id, file_state, #{collkey}.collkey(display_name, 'root', false, 3, true)",
algorithm: :concurrently, name: :index_attachments_on_folder_id_and_file_state_and_display_name,
where: 'folder_id IS NOT NULL'
end
end

View File

@ -0,0 +1,42 @@
#
# Copyright (C) 2017 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
class DropOldCollationKeyIndexes < ActiveRecord::Migration[5.0]
tag :postdeploy
disable_ddl_transaction!
def up
return unless connection.extension_installed?(:pg_collkey)
remove_index :users, name: :index_users_on_sortable_name_old
remove_index :attachments, name: :index_attachments_on_folder_id_and_file_state_and_display_name1
end
def down
collkey = connection.extension_installed?(:pg_collkey)
return unless collkey
add_index :attachments, "folder_id, file_state, #{collkey}.collkey(display_name, 'root', false, 0, true)",
algorithm: :concurrently, name: :index_attachments_on_folder_id_and_file_state_and_display_name1,
where: 'folder_id IS NOT NULL'
add_index :users, "#{collkey}.collkey(sortable_name, 'root', false, 0, true)",
algorithm: :concurrently, name: :index_users_on_sortable_name_old
end
end

View File

@ -121,8 +121,12 @@ module Canvas::ICU
@collations ||= {}
@collations[I18n.locale] ||= begin
collator = ICU::Collation::Collator.new(I18n.locale.to_s)
# Reference documentation (some option names differ in ruby-space)for these options is at
# http://userguide.icu-project.org/collation/customization#TOC-Default-Options
collator.normalization_mode = true
collator.numeric_collation = true
collator.strength = :tertiary
collator
end
end

View File

@ -1505,10 +1505,16 @@ test('returns 1 if the name field comes later alphabetically in the first record
strictEqual(this.gradebook.compareAssignmentNames(this.secondRecord, this.firstRecord), 1);
});
test('comparison is case-insensitive', function () {
test('comparison is case-sensitive between alpha and Alpha', function () {
const thirdRecord = this.getRecord('Alpha');
strictEqual(this.gradebook.compareAssignmentNames(thirdRecord, this.firstRecord), 0);
strictEqual(this.gradebook.compareAssignmentNames(thirdRecord, this.firstRecord), 1);
});
test('comparison does not group uppercase letters together', function () {
const thirdRecord = this.getRecord('Omega');
strictEqual(this.gradebook.compareAssignmentNames(thirdRecord, this.secondRecord), 1);
});
QUnit.module('Gradebook#compareAssignmentPointsPossible', {

View File

@ -0,0 +1,50 @@
/*
* Copyright (C) 2017 - present Instructure, Inc.
*
* This file is part of Canvas.
*
* Canvas is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation, version 3 of the License.
*
* Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License along
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import natcompare from 'compiled/util/natcompare';
const valuesToSort = [
{ id: 1, name: 'john, john' },
{ id: 2, name: 'abel' },
{ id: 3, name: 'John, John' },
{ id: 4, name: 'abel' },
{ id: 5, name: 'johnson, john' },
{ id: 6, name: 'âbel' },
{ id: 7, name: 'Abel' },
{ id: 8, name: 'joh, jonny' },
{ id: 9, name: 'joh jonny' },
];
QUnit.module('sorts values properly when used to compare strings');
test('puts remaining words in the right order since there are no collisions possible', function () {
const expectedSortedStrings = [
'abel',
'abel',
'Abel',
'âbel',
'joh jonny',
'joh, jonny',
'john, john',
'John, John',
'johnson, john'
];
const sortedValueNames = valuesToSort.sort(natcompare.byKey('name')).map(item => item.name);
deepEqual(sortedValueNames, expectedSortedStrings);
});