Fix search outcomes

* Fix similarity order when searching outcomes
* Add support to remove stop words based on the Account language

closes OUT-4704, OUT-4705
flag=improved_outcomes_management

Test plan:
- Search for outcomes in the management screen and find outcomes modal
- Assert the outcomes appear "in the expected order"
- Also test adding stop words in the query an assert it doesnt influence
  in the result

Change-Id: Iecbf37551e40f7e0024a15e90b2d283e56e82cff
Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/271715
Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com>
Reviewed-by: Brian Watson <bwatson@instructure.com>
Reviewed-by: Martin Yosifov <martin.yosifov@instructure.com>
Reviewed-by: Marcus Pompeu <marcus.pompeu@instructure.com>
QA-Review: Brian Watson <bwatson@instructure.com>
Product-Review: Ben Friedman <ben.friedman@instructure.com>
This commit is contained in:
Manoel Quirino Neto 2021-08-18 11:15:52 -03:00 committed by Manoel Quirino
parent f25f3df70f
commit e5568b0ed0
2 changed files with 195 additions and 31 deletions

View File

@ -24,9 +24,38 @@ module Outcomes
SHORT_DESCRIPTION = "coalesce(learning_outcomes.short_description, '')"
# rubocop:disable Layout/LineLength
# E'<[^>]+>' -> removes html tags
# E'&\\w+;' -> removes html entities
DESCRIPTION = "regexp_replace(regexp_replace(coalesce(learning_outcomes.description, ''), E'<[^>]+>', '', 'gi'), E'&\\w+;', ' ', 'gi')"
# rubocop:enable Layout/LineLength
MAP_CANVAS_POSTGRES_LOCALES = {
"ar" => "arabic", # العربية
"ca" => "spanish", # Català
"da" => "danish", # Dansk
"da-x-k12" => "danish", # Dansk GR/GY
"de" => "german", # Deutsch
"en-AU" => "english", # English (Australia)
"en-CA" => "english", # English (Canada)
"en-GB" => "english", # English (United Kingdom)
"en" => "english", # English (US)
"es" => "spanish", # Español
"fr" => "french", # Français
"fr-CA" => "french", # Français (Canada)
"it" => "italian", # Italiano
"hu" => "hungarian", # Magyar
"nl" => "dutch", # Nederlands
"nb" => "norwegian", # Norsk (Bokmål)
"nb-x-k12" => "norwegian", # Norsk (Bokmål) GS/VGS
"pt" => "portuguese", # Português
"pt-BR" => "portuguese", # Português do Brasil
"ru" => "russian", # pу́сский
"fi" => "finnish", # Suomi
"sv" => "swedish", # Svenska
"sv-x-k12" => "swedish", # Svenska GR/GY
"tr" => "turkish" # Türkçe
}.freeze
def initialize(context = nil)
@context = context
@ -45,10 +74,10 @@ module Outcomes
def suboutcomes_by_group_id(learning_outcome_group_id, args={})
learning_outcome_groups_ids = children_ids(learning_outcome_group_id) << learning_outcome_group_id
relation = ContentTag.active.learning_outcome_links.
where(associated_asset_id: learning_outcome_groups_ids).
joins(:learning_outcome_content).
joins("INNER JOIN #{LearningOutcomeGroup.quoted_table_name} AS logs
relation = ContentTag.active.learning_outcome_links
.where(associated_asset_id: learning_outcome_groups_ids)
.joins(:learning_outcome_content)
.joins("INNER JOIN #{LearningOutcomeGroup.quoted_table_name} AS logs
ON logs.id = content_tags.associated_asset_id")
if args[:search_query]
@ -73,13 +102,20 @@ module Outcomes
Rails.cache.delete(context_timestamp_cache_key) if improved_outcomes_management?
end
def self.supported_languages
# cache this in the class since this won't change so much
@supported_languages ||= ContentTag.connection.execute(
'SELECT cfgname FROM pg_ts_config'
).to_a.map {|r| r['cfgname']}
end
private
def total_outcomes_for(learning_outcome_group_id, args={})
learning_outcome_groups_ids = children_ids(learning_outcome_group_id) << learning_outcome_group_id
relation = ContentTag.active.learning_outcome_links.
where(associated_asset_id: learning_outcome_groups_ids)
relation = ContentTag.active.learning_outcome_links
.where(associated_asset_id: learning_outcome_groups_ids)
if args[:search_query]
relation = relation.joins(:learning_outcome_content)
@ -90,18 +126,33 @@ module Outcomes
end
def add_search_query(relation, search_query)
search_query_tokens = search_query.split(' ')
# Tried to check if the lang is supported in the same query
# using a CASE WHEN but it wont work because it'll
# parse to_tsvector with the not supported lang, and it'll throw an error
short_description_query = ContentTag.sanitize_sql_array(["#{SHORT_DESCRIPTION} ~* ANY(array[?])", search_query_tokens])
sql = if self.class.supported_languages.include?(lang)
ContentTag.sanitize_sql_array([<<~SQL.squish, lang, search_query])
SELECT unnest(tsvector_to_array(to_tsvector(?, ?))) as token
SQL
else
ContentTag.sanitize_sql_array([<<~SQL.squish, search_query])
SELECT unnest(tsvector_to_array(to_tsvector(?))) as token
SQL
end
search_query_tokens = ContentTag.connection.execute(sql).to_a.map {|r| r['token']}.uniq
short_description_query = ContentTag.sanitize_sql_array(["#{SHORT_DESCRIPTION} ~* ANY(array[?])",
search_query_tokens])
description_query = ContentTag.sanitize_sql_array(["#{DESCRIPTION} ~* ANY(array[?])", search_query_tokens])
relation.where("#{short_description_query} OR #{description_query}")
end
def add_search_order(relation, search_query)
select_query = ContentTag.sanitize_sql_array([<<-SQL, search_query, search_query])
select_query = ContentTag.sanitize_sql_array([<<-SQL.squish, search_query, search_query])
"content_tags".*,
GREATEST(public.word_similarity(#{SHORT_DESCRIPTION}, ?), public.word_similarity(#{DESCRIPTION}, ?)) as sim
GREATEST(public.word_similarity(?, #{SHORT_DESCRIPTION}), public.word_similarity(?, #{DESCRIPTION})) as sim
SQL
relation.select(select_query).order(
@ -127,7 +178,7 @@ module Outcomes
end
def learning_outcome_group_descendants_query
<<-SQL
<<-SQL.squish
WITH RECURSIVE levels AS (
SELECT id, learning_outcome_group_id AS parent_id
FROM (#{LearningOutcomeGroup.active.where(context: context).to_sql}) AS data
@ -163,15 +214,25 @@ module Outcomes
end
def context_asset_string
@_context_asset_string ||= (context || LearningOutcomeGroup.global_root_outcome_group).global_asset_string
@context_asset_string ||= (context || LearningOutcomeGroup.global_root_outcome_group).global_asset_string
end
def improved_outcomes_management?
@improved_outcomes_management ||= begin
return context.root_account.feature_enabled?(:improved_outcomes_management) if context
@improved_outcomes_management ||= if context
context.root_account.feature_enabled?(:improved_outcomes_management)
else
LoadAccount.default_domain_root_account.feature_enabled?(:improved_outcomes_management)
end
end
def lang
# lang can be nil, so we check with instance_variable_defined? method
unless instance_variable_defined?("@lang")
account = context&.root_account || LoadAccount.default_domain_root_account
@lang = MAP_CANVAS_POSTGRES_LOCALES[account.default_locale || "en"]
end
@lang
end
end
end

View File

@ -23,7 +23,7 @@ require 'spec_helper'
describe Outcomes::LearningOutcomeGroupChildren do
subject { described_class.new(context) }
# rubocop:disable RSpec/LetSetup
# rubocop:disable RSpec/LetSetup, Layout/LineLength
let!(:context) { Account.default }
let!(:global_group) { LearningOutcomeGroup.create(title: 'global') }
let!(:global_group_subgroup) { global_group.child_outcome_groups.build(title: 'global subgroup') }
@ -48,7 +48,7 @@ describe Outcomes::LearningOutcomeGroupChildren do
let!(:o9) { outcome_model(context: context, outcome_group: g6, title:'Outcome 7.1', short_description: 'Outcome 7.1') }
let!(:o10) { outcome_model(context: context, outcome_group: g6, title:'Outcome 7.2', short_description: 'Outcome 7.2') }
let!(:o11) { outcome_model(context: context, outcome_group: g6, title:'Outcome 7.3 mathematic', short_description: 'Outcome 7.3 mathematic') }
# rubocop:enable RSpec/LetSetup
# rubocop:enable RSpec/LetSetup, Layout/LineLength
# Outcome Structure for visual reference
# Global
@ -151,7 +151,8 @@ describe Outcomes::LearningOutcomeGroupChildren do
describe '#suboutcomes_by_group_id' do
it 'returns the outcomes ordered by parent group title then outcome short_description' do
g_outcomes = subject.suboutcomes_by_group_id(global_group.id).map(&:learning_outcome_content).map(&:short_description)
g_outcomes = subject.suboutcomes_by_group_id(global_group.id)
.map(&:learning_outcome_content).map(&:short_description)
expect(g_outcomes).to match_array(['G Outcome 1', 'G Outcome 2'])
r_outcomes = subject.suboutcomes_by_group_id(g0.id).map(&:learning_outcome_content).map(&:short_description)
expect(r_outcomes).to match_array(
@ -226,7 +227,8 @@ describe Outcomes::LearningOutcomeGroupChildren do
subject { described_class.new }
it 'returns global outcomes' do
outcomes = subject.suboutcomes_by_group_id(global_group.id).map(&:learning_outcome_content).map(&:short_description)
outcomes = subject.suboutcomes_by_group_id(global_group.id).map(&:learning_outcome_content)
.map(&:short_description)
expect(outcomes).to match_array(['G Outcome 1', 'G Outcome 2'])
end
end
@ -258,13 +260,15 @@ describe Outcomes::LearningOutcomeGroupChildren do
outcome_group: g1,
title: "FO.3",
description: 'apply their growing knowledge of root words, prefixes and suffixes (etymology and morphology)'\
' as listed in English Appendix 1, both to read aloud and to understand the meaning of new words they meet'
' as listed in English Appendix 1, both to read aloud and to understand the meaning of new wor'\
'ds they meet'
)
outcome_model(
context: context,
outcome_group: g1,
title: "HT.ML.1.1",
description: '<p>Pellentesque&nbsp;habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas.</p>'
description: '<p>Pellentesque&nbsp;habitant morbi tristique senectus et netus et malesuada fames ac turpis e'\
'gestas.</p>'
)
outcome_model(
context: context,
@ -275,42 +279,141 @@ describe Outcomes::LearningOutcomeGroupChildren do
end
it "filters title with non-alphanumerical chars" do
outcomes = subject.suboutcomes_by_group_id(g1.id, {search_query: "LA.1"}).map(&:learning_outcome_content).map(&:short_description)
outcomes = subject.suboutcomes_by_group_id(g1.id, {search_query: "LA.1"})
.map(&:learning_outcome_content).map(&:short_description)
expect(outcomes).to eql([
"LA.1.1.1", "LA.1.1.1.1"
])
end
it "filters description with text content" do
outcomes = subject.suboutcomes_by_group_id(g1.id, {search_query: "knowledge"}).map(&:learning_outcome_content).map(&:short_description)
outcomes = subject.suboutcomes_by_group_id(g1.id, {search_query: "knowledge"})
.map(&:learning_outcome_content).map(&:short_description)
expect(outcomes).to eql([
'LA.1.1.1', 'FO.3'
'FO.3', 'LA.1.1.1'
])
end
it "filters description with html content" do
outcomes = subject.suboutcomes_by_group_id(g1.id, {search_query: "Pellentesque"}).map(&:learning_outcome_content).map(&:short_description)
outcomes = subject.suboutcomes_by_group_id(g1.id, {search_query: "Pellentesque"})
.map(&:learning_outcome_content).map(&:short_description)
expect(outcomes).to eql([
'HT.ML.1.1'
])
end
it "filters more than 1 word" do
outcomes = subject.suboutcomes_by_group_id(g1.id, {search_query: "LA.1.1 Pellentesque"}).map(&:learning_outcome_content).map(&:short_description)
outcomes = subject.suboutcomes_by_group_id(g1.id, {search_query: "LA.1.1 Pellentesque"})
.map(&:learning_outcome_content).map(&:short_description)
expect(outcomes).to eql([
"HT.ML.1.1",
"LA.1.1.1",
"LA.1.1.1.1",
"HT.ML.1.1"
"LA.1.1.1.1"
])
end
it "filters when words aren't all completed" do
outcomes = subject.suboutcomes_by_group_id(g1.id, {search_query: "recog awe"}).map(&:learning_outcome_content).map(&:short_description)
outcomes = subject.suboutcomes_by_group_id(g1.id, {search_query: "recog awe"})
.map(&:learning_outcome_content).map(&:short_description)
expect(outcomes).to eql([
"HT.ML.1.2",
"LA.2.2.1.2"
"LA.2.2.1.2",
"HT.ML.1.2"
])
end
context 'when lang is portuguese' do
it "filters outcomes removing portuguese stop words" do
account = context.root_account
account.default_locale = "pt-BR"
account.save!
outcome_model(
context: context,
outcome_group: g1,
title: "will bring",
description: '<p>Um texto <b>portugues</b>.</p>'
)
outcome_model(
context: context,
outcome_group: g1,
title: "won't bring",
description: '<p>Um animal bonito.</p>'
)
outcomes = subject.suboutcomes_by_group_id(
g1.id, {
search_query: "Um portugues"
}
).map(&:learning_outcome_content).map(&:short_description)
expect(outcomes).to eql([
"will bring"
])
end
context 'when context is nil' do
subject { described_class.new }
it 'filters outcomes removing portuguese stop words' do
account = Account.default
account.default_locale = "pt-BR"
account.save!
outcome_model(
global: true,
title: "will bring",
description: '<p>Um texto <b>portugues</b>.</p>'
)
outcome_model(
global: true,
title: "won't bring",
description: '<p>Um animal bonito.</p>'
)
outcomes = subject.suboutcomes_by_group_id(
LearningOutcomeGroup.find_or_create_root(nil, true).id, {
search_query: "Um portugues"
}
).map(&:learning_outcome_content).map(&:short_description)
expect(outcomes).to eql([
"will bring"
])
end
end
end
context 'when lang is not supported' do
before do
account = context.root_account
account.default_locale = "pl" # polski
account.save!
end
it "filters outcomes normally" do
outcome_model(
context: context,
outcome_group: g1,
title: "will bring",
description: '<p>Um texto <b>portugues</b>.</p>'
)
outcome_model(
context: context,
outcome_group: g1,
title: "will bring too",
description: '<p>Um animal bonito.</p>'
)
outcomes = subject.suboutcomes_by_group_id(
g1.id, {search_query: "Um portugues"}
).map(&:learning_outcome_content).map(&:short_description)
expect(outcomes).to eql([
"will bring",
"will bring too"
])
end
end
end
end