improve name inference

* don't treat suffix-like-names as suffixes when they're more likely
   the given name
 * even more so when it should already be in last-name-first context

test plan:
 * have a user with the full name "Vi Duing", which should translate
   to a sortable name of "Duing, Vi"
 * do an LTI launch
 * first name should come through as Vi, last name as duing

Change-Id: I2d728614af8ff421a5e14e8c2d116d9f0d0a2499
Reviewed-on: https://gerrit.instructure.com/63139
Tested-by: Jenkins
QA-Review: August Thornton <august@instructure.com>
Reviewed-by: Rob Orton <rob@instructure.com>
Product-Review: Cody Cutrer <cody@instructure.com>
This commit is contained in:
Cody Cutrer 2015-09-11 15:31:50 -06:00
parent 72d8de6429
commit 5fdfe00b88
3 changed files with 37 additions and 13 deletions

View File

@ -650,18 +650,18 @@ class User < ActiveRecord::Base
end
def first_name
User.name_parts(self.sortable_name)[0] || ''
User.name_parts(self.sortable_name, likely_already_surname_first: true)[0] || ''
end
def last_name
User.name_parts(self.sortable_name)[1] || ''
User.name_parts(self.sortable_name, likely_already_surname_first: true)[1] || ''
end
# Feel free to add, but the "authoritative" list (http://en.wikipedia.org/wiki/Title_(name)) is quite large
SUFFIXES = /^(Sn?r\.?|Senior|Jn?r\.?|Junior|II|III|IV|V|VI|Esq\.?|Esquire)$/i
# see also user_sortable_name.js
def self.name_parts(name, prior_surname = nil)
def self.name_parts(name, prior_surname: nil, likely_already_surname_first: false)
return [nil, nil, nil] unless name
surname, given, suffix = name.strip.split(/\s*,\s*/, 3)
@ -674,7 +674,7 @@ class User < ActiveRecord::Base
if given
# John Doe, Sr.
if !suffix && given =~ SUFFIXES
if !likely_already_surname_first && !suffix && surname =~ /\s/ && given =~ SUFFIXES
suffix = given
given = surname
surname = nil
@ -699,8 +699,9 @@ class User < ActiveRecord::Base
[ given_parts.empty? ? nil : given_parts.join(' '), surname, suffix ]
end
def self.last_name_first(name, name_was = nil)
given, surname, suffix = name_parts(name, name_parts(name_was)[1])
def self.last_name_first(name, name_was = nil, likely_already_surname_first:)
previous_surname = name_parts(name_was, likely_already_surname_first: likely_already_surname_first)[1]
given, surname, suffix = name_parts(name, prior_surname: previous_surname)
given = [given, suffix].compact.join(' ')
surname ? "#{surname}, #{given}".strip : given
end
@ -722,8 +723,12 @@ class User < ActiveRecord::Base
self.short_name ||= self.name
self.sortable_name = nil if self.sortable_name == ""
# recalculate the sortable name if the name changed, but the sortable name didn't, and the sortable_name matches the old name
self.sortable_name = nil if !self.sortable_name_changed? && self.name_changed? && User.name_parts(self.sortable_name).compact.join(' ') == self.name_was
self.sortable_name = User.last_name_first(self.name, self.sortable_name_was) unless read_attribute(:sortable_name)
self.sortable_name = nil if !self.sortable_name_changed? &&
self.name_changed? &&
User.name_parts(self.sortable_name, likely_already_surname_first: true).compact.join(' ') == self.name_was
unless read_attribute(:sortable_name)
self.sortable_name = User.last_name_first(self.name, self.sortable_name_was, likely_already_surname_first: true)
end
self.reminder_time_for_due_dates ||= 48.hours.to_i
self.reminder_time_for_grading ||= 0
self.initial_enrollment_type = nil unless ['student', 'teacher', 'ta', 'observer'].include?(initial_enrollment_type)
@ -732,7 +737,8 @@ class User < ActiveRecord::Base
end
def sortable_name
self.sortable_name = read_attribute(:sortable_name) || User.last_name_first(self.name)
self.sortable_name = read_attribute(:sortable_name) ||
User.last_name_first(self.name, likely_already_surname_first: false)
end
def primary_pseudonym

View File

@ -124,7 +124,7 @@ module AccountReports
row << u.sis_user_id
row << u.unique_id
row << nil if @sis_format
name_parts = User.name_parts(u.sortable_name)
name_parts = User.name_parts(u.sortable_name, likely_already_surname_first: true)
row << name_parts[0] || '' # first name
row << name_parts[1] || '' # last name
row << u.name

View File

@ -1198,27 +1198,45 @@ describe User do
expect(User.name_parts('Cody Cutrer')).to eq ['Cody', 'Cutrer', nil]
expect(User.name_parts(' Cody Cutrer ')).to eq ['Cody', 'Cutrer', nil]
expect(User.name_parts('Cutrer, Cody')).to eq ['Cody', 'Cutrer', nil]
expect(User.name_parts('Cutrer, Cody',
likely_already_surname_first: true)).to eq ['Cody', 'Cutrer', nil]
expect(User.name_parts('Cutrer, Cody Houston')).to eq ['Cody Houston', 'Cutrer', nil]
expect(User.name_parts('Cutrer, Cody Houston',
likely_already_surname_first: true)).to eq ['Cody Houston', 'Cutrer', nil]
expect(User.name_parts('St. Clair, John')).to eq ['John', 'St. Clair', nil]
expect(User.name_parts('St. Clair, John',
likely_already_surname_first: true)).to eq ['John', 'St. Clair', nil]
# sorry, can't figure this out
expect(User.name_parts('John St. Clair')).to eq ['John St.', 'Clair', nil]
expect(User.name_parts('Jefferson Thomas Cutrer IV')).to eq ['Jefferson Thomas', 'Cutrer', 'IV']
expect(User.name_parts('Jefferson Thomas Cutrer, IV')).to eq ['Jefferson Thomas', 'Cutrer', 'IV']
expect(User.name_parts('Cutrer, Jefferson, IV')).to eq ['Jefferson', 'Cutrer', 'IV']
expect(User.name_parts('Cutrer, Jefferson, IV',
likely_already_surname_first: true)).to eq ['Jefferson', 'Cutrer', 'IV']
expect(User.name_parts('Cutrer, Jefferson IV')).to eq ['Jefferson', 'Cutrer', 'IV']
expect(User.name_parts('Cutrer, Jefferson IV',
likely_already_surname_first: true)).to eq ['Jefferson', 'Cutrer', 'IV']
expect(User.name_parts(nil)).to eq [nil, nil, nil]
expect(User.name_parts('Bob')).to eq ['Bob', nil, nil]
expect(User.name_parts('Ho, Chi, Min')).to eq ['Chi Min', 'Ho', nil]
expect(User.name_parts('Ho, Chi, Min')).to eq ['Chi Min', 'Ho', nil]
# sorry, don't understand cultures that put the surname first
# they should just manually specify their sort name
expect(User.name_parts('Ho Chi Min')).to eq ['Ho Chi', 'Min', nil]
expect(User.name_parts('')).to eq [nil, nil, nil]
expect(User.name_parts('John Doe')).to eq ['John', 'Doe', nil]
expect(User.name_parts('Junior')).to eq ['Junior', nil, nil]
expect(User.name_parts('John St. Clair', 'St. Clair')).to eq ['John', 'St. Clair', nil]
expect(User.name_parts('John St. Clair', 'Cutrer')).to eq ['John St.', 'Clair', nil]
expect(User.name_parts('St. Clair', 'St. Clair')).to eq [nil, 'St. Clair', nil]
expect(User.name_parts('John St. Clair', prior_surname: 'St. Clair')).to eq ['John', 'St. Clair', nil]
expect(User.name_parts('John St. Clair', prior_surname: 'Cutrer')).to eq ['John St.', 'Clair', nil]
expect(User.name_parts('St. Clair', prior_surname: 'St. Clair')).to eq [nil, 'St. Clair', nil]
expect(User.name_parts('St. Clair,')).to eq [nil, 'St. Clair', nil]
# don't get confused by given names that look like suffixes
expect(User.name_parts('Duing, Vi')).to eq ['Vi', 'Duing', nil]
# we can't be perfect. don't know what to do with this
expect(User.name_parts('Duing Chi Min, Vi')).to eq ['Duing Chi', 'Min', 'Vi']
# unless we thought it was already last name first
expect(User.name_parts('Duing Chi Min, Vi',
likely_already_surname_first: true)).to eq ['Vi', 'Duing Chi Min', nil]
end
it "should keep the sortable_name up to date if all that changed is the name" do