canvas-lms/lib/cc/cc_helper.rb

381 lines
14 KiB
Ruby

#
# Copyright (C) 2011 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
require 'nokogiri'
module CC
module CCHelper
CANVAS_NAMESPACE = 'http://canvas.instructure.com/xsd/cccv1p0'
XSD_URI = 'https://canvas.instructure.com/xsd/cccv1p0.xsd'
# IMS formats/types
IMS_DATE = "%Y-%m-%d"
IMS_DATETIME = "%Y-%m-%dT%H:%M:%S"
CC_EXTENSION = 'imscc'
QTI_EXTENSION = ".xml.qti"
CANVAS_PLATFORM = 'canvas.instructure.com'
# Common Cartridge 1.0
# associatedcontent/imscc_xmlv1p0/learning-application-resource
# imsdt_xmlv1p0
# imswl_xmlv1p0
# imsqti_xmlv1p2/imscc_xmlv1p0/assessment
# imsqti_xmlv1p2/imscc_xmlv1p0/question-bank
# Common Cartridge 1.1 (What Canvas exports)
ASSESSMENT_TYPE = 'imsqti_xmlv1p2/imscc_xmlv1p1/assessment'
QUESTION_BANK = 'imsqti_xmlv1p2/imscc_xmlv1p1/question-bank'
DISCUSSION_TOPIC = "imsdt_xmlv1p1"
LOR = "associatedcontent/imscc_xmlv1p1/learning-application-resource"
WEB_LINK = "imswl_xmlv1p1"
WEBCONTENT = "webcontent"
BASIC_LTI = 'imsbasiclti_xmlv1p0'
BLTI_NAMESPACE = "http://www.imsglobal.org/xsd/imsbasiclti_v1p0"
# Common Cartridge 1.2
# associatedcontent/imscc_xmlv1p2/learning-application-resource
# imsdt_xmlv1p2
# imswl_xmlv1p2
# imsqti_xmlv1p2/imscc_xmlv1p2/assessment
# imsqti_xmlv1p2/imscc_xmlv1p2/question-bank
# imsbasiclti_xmlv1p0
# Common Cartridge 1.3
ASSIGNMENT_TYPE = "assignment_xmlv1p0"
ASSIGNMENT_NAMESPACE = "http://www.imsglobal.org/xsd/imscc_extensions/assignment"
ASSIGNMENT_XSD_URI = "http://www.imsglobal.org/profile/cc/cc_extensions/cc_extresource_assignmentv1p0_v1p0.xsd"
# QTI-only export
QTI_ASSESSMENT_TYPE = 'imsqti_xmlv1p2'
# substitution tokens
OBJECT_TOKEN = "$CANVAS_OBJECT_REFERENCE$"
COURSE_TOKEN = "$CANVAS_COURSE_REFERENCE$"
WIKI_TOKEN = "$WIKI_REFERENCE$"
WEB_CONTENT_TOKEN = "$IMS-CC-FILEBASE$"
# file names/paths
ASSESSMENT_CC_QTI = "assessment_qti.xml"
ASSESSMENT_NON_CC_FOLDER = 'non_cc_assessments'
ASSESSMENT_META = "assessment_meta.xml"
ASSIGNMENT_GROUPS = "assignment_groups.xml"
ASSIGNMENT_SETTINGS = "assignment_settings.xml"
COURSE_SETTINGS = "course_settings.xml"
COURSE_SETTINGS_DIR = "course_settings"
EXTERNAL_FEEDS = "external_feeds.xml"
GRADING_STANDARDS = "grading_standards.xml"
EVENTS = "events.xml"
LEARNING_OUTCOMES = "learning_outcomes.xml"
MANIFEST = 'imsmanifest.xml'
MODULE_META = "module_meta.xml"
RUBRICS = "rubrics.xml"
EXTERNAL_TOOLS = "external_tools.xml"
FILES_META = "files_meta.xml"
SYLLABUS = "syllabus.html"
WEB_RESOURCES_FOLDER = 'web_resources'
WIKI_FOLDER = 'wiki_content'
MEDIA_OBJECTS_FOLDER = 'media_objects'
CANVAS_EXPORT_FLAG = 'canvas_export.txt'
MEDIA_TRACKS = 'media_tracks.xml'
ASSIGNMENT_XML = 'assignment.xml'
EXTERNAL_CONTENT_FOLDER = 'external_content'
def ims_date(date=nil,default=Time.now)
CCHelper.ims_date(date, default)
end
def ims_datetime(date=nil,default=Time.now)
CCHelper.ims_datetime(date, default)
end
def self.create_key(object, prepend="")
if object.is_a? ActiveRecord::Base
key = object.asset_string
else
key = object.to_s
end
"i" + Digest::MD5.hexdigest(prepend + key)
end
def self.ims_date(date=nil,default=Time.now)
date ||= default
return nil unless date
date.respond_to?(:utc) ? date.utc.strftime(IMS_DATE) : date.strftime(IMS_DATE)
end
def self.ims_datetime(date=nil,default=Time.now)
date ||= default
return nil unless date
date.respond_to?(:utc) ? date.utc.strftime(IMS_DATETIME) : date.strftime(IMS_DATETIME)
end
def get_html_title_and_body_and_id(doc)
id = get_node_val(doc, 'html head meta[name=identifier] @content')
get_html_title_and_body(doc) << id
end
def get_html_title_and_body_and_meta_fields(doc)
meta_fields = {}
doc.css('html head meta').each do |meta_node|
if key = meta_node['name']
meta_fields[key] = meta_node['content']
end
end
get_html_title_and_body(doc) << meta_fields
end
def get_html_title_and_body(doc)
title = get_node_val(doc, 'html head title')
body = doc.at_css('html body').to_s.force_encoding(Encoding::UTF_8).gsub(%r{</?body>}, '').strip
[title, body]
end
def self.map_linked_objects(content)
linked_objects = []
html = Nokogiri::HTML.fragment(content)
html.css('a, img').each do |atag|
source = atag['href'] || atag['src']
next unless source =~ /%24[^%]*%24/
if source.include?(CGI.escape(CC::CCHelper::WEB_CONTENT_TOKEN))
attachment_key = source.sub(CGI.escape(CC::CCHelper::WEB_CONTENT_TOKEN), '')
attachment_key = attachment_key.split('?').first
attachment_key = attachment_key.split('/').map {|ak| CGI.unescape(ak)}.join('/')
linked_objects.push({local_path: attachment_key, type: 'Attachment'})
else
type, object_key = source.split('/').last 2
if type =~ /%24[^%]*%24/
type = object_key
object_key = nil
end
linked_objects.push({identifier: object_key, type: type})
end
end
linked_objects
end
require 'set'
class HtmlContentExporter
attr_reader :used_media_objects, :media_object_flavor, :media_object_infos
attr_accessor :referenced_files
def initialize(course, user, opts = {})
@media_object_flavor = opts[:media_object_flavor]
@used_media_objects = Set.new
@media_object_infos = {}
@rewriter = UserContent::HtmlRewriter.new(course, user, contextless_types: ['files'])
@course = course
@user = user
@track_referenced_files = opts[:track_referenced_files]
@for_course_copy = opts[:for_course_copy]
@for_epub_export = opts[:for_epub_export]
@key_generator = opts[:key_generator] || CC::CCHelper
@referenced_files = {}
@rewriter.set_handler('file_contents') do |match|
if match.url =~ %r{/media_objects/(\d_\w+)}
# This is a media object referencing an attachment that it shouldn't be
"/media_objects/#{$1}"
else
match.url.sub(/course( |%20)files/, WEB_CONTENT_TOKEN)
end
end
@rewriter.set_handler('files') do |match|
if match.obj_id.nil?
if match_data = match.url.match(%r{/files/folder/(.*)})
# this might not be the best idea but let's keep going and see what happens
"#{COURSE_TOKEN}/files/folder/#{match_data[1]}"
elsif match.prefix.present?
# If match.obj_id is nil, it's because we're actually linking to a page
# (the /courses/:id/files page) and not to a specific file. In this case,
# just pass it straight through.
"#{COURSE_TOKEN}/files"
end
else
if @course && match.obj_class == Attachment
obj = @course.attachments.find_by_id(match.obj_id)
else
obj = match.obj_class.where(id: match.obj_id).first
end
next(match.url) unless obj && (@rewriter.user_can_view_content?(obj) || @for_epub_export)
folder = obj.folder.full_name.sub(/course( |%20)files/, WEB_CONTENT_TOKEN)
folder = folder.split("/").map{|part| URI.escape(part)}.join("/")
@referenced_files[obj.id] = @key_generator.create_key(obj) if @track_referenced_files && !@referenced_files[obj.id]
# for files, turn it into a relative link by path, rather than by file id
# we retain the file query string parameters
path = "#{folder}/#{URI.escape(obj.display_name)}"
path = HtmlTextHelper.escape_html(path)
"#{path}#{CCHelper.file_query_string(match.rest)}"
end
end
wiki_handler = Proc.new do |match|
# WikiPagesController allows loosely-matching URLs; fix them before exporting
if match.obj_id.present?
url_or_title = match.obj_id
page = @course.wiki_pages.deleted_last.where(url: url_or_title).first ||
@course.wiki_pages.deleted_last.where(url: url_or_title.to_url).first ||
@course.wiki_pages.where(id: url_or_title.to_i).first
end
if page
"#{WIKI_TOKEN}/#{match.type}/#{page.url}#{match.query}"
else
"#{WIKI_TOKEN}/#{match.type}/#{match.obj_id}#{match.query}"
end
end
@rewriter.set_handler('wiki', &wiki_handler)
@rewriter.set_handler('pages', &wiki_handler)
@rewriter.set_handler('items') do |match|
item = ContentTag.find(match.obj_id)
migration_id = @key_generator.create_key(item)
new_url = "#{COURSE_TOKEN}/modules/#{match.type}/#{migration_id}#{match.query}"
end
@rewriter.set_default_handler do |match|
new_url = match.url
if match.obj_id && match.obj_class
obj = match.obj_class.where(id: match.obj_id).first
if obj && (@rewriter.user_can_view_content?(obj) || @for_epub_export)
# for all other types,
# create a migration id for the object, and use that as the new link
migration_id = @key_generator.create_key(obj)
query = translate_module_item_query(match.query)
new_url = "#{OBJECT_TOKEN}/#{match.type}/#{migration_id}#{query}"
end
elsif match.obj_id
new_url = "#{COURSE_TOKEN}/#{match.type}/#{match.obj_id}#{match.rest}"
else
new_url = "#{COURSE_TOKEN}/#{match.type}#{match.rest}"
end
new_url
end
protocol = HostUrl.protocol
host = HostUrl.context_host(@course)
port = ConfigFile.load("domain").try(:[], :domain).try(:split, ':').try(:[], 1)
@url_prefix = "#{protocol}://#{host}"
@url_prefix += ":#{port}" if !host.include?(':') && port.present?
end
def translate_module_item_query(query)
return query unless query&.include?("module_item_id=")
original_param = query.sub("?", "").split("&").detect{|p| p.include?("module_item_id=")}
tag_id = original_param.split("=").last
new_param = "module_item_id=#{@key_generator.create_key("content_tag_#{tag_id}")}"
query.sub(original_param, new_param)
end
attr_reader :course, :user
def html_page(html, title, meta_fields={})
content = html_content(html)
meta_html = ""
meta_fields.each_pair do |k, v|
next unless v.present?
meta_html += %{<meta name="#{HtmlTextHelper.escape_html(k.to_s)}" content="#{HtmlTextHelper.escape_html(v.to_s)}"/>\n}
end
%{<html>\n<head>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n<title>#{HtmlTextHelper.escape_html(title)}</title>\n#{meta_html}</head>\n<body>\n#{content}\n</body>\n</html>}
end
def html_content(html)
html = @rewriter.translate_content(html)
return html if html.blank? || @for_course_copy
# keep track of found media comments, and translate them into links into the files tree
# if imported back into canvas, they'll get uploaded to the media server
# and translated back into media comments
doc = Nokogiri::HTML::DocumentFragment.parse(html)
doc.css('a.instructure_inline_media_comment').each do |anchor|
next unless anchor['id']
media_id = anchor['id'].gsub(/^media_comment_/, '')
obj = MediaObject.by_media_id(media_id).first
if obj && migration_id = @key_generator.create_key(obj)
@used_media_objects << obj
info = CCHelper.media_object_info(obj, nil, media_object_flavor)
@media_object_infos[obj.id] = info
anchor['href'] = File.join(WEB_CONTENT_TOKEN, MEDIA_OBJECTS_FOLDER, info[:filename])
end
end
# prepend the Canvas domain to remaining absolute paths that are missing the host
# (those in the course are already "$CANVAS_COURSE_REFERENCE$/...", but links
# outside the course need a domain to be meaningful in the export)
# see also Api#api_user_content, which does a similar thing
Api::Html::Content::URL_ATTRIBUTES.each do |tag, attributes|
doc.css(tag).each do |element|
attributes.each do |attribute|
url_str = element[attribute]
begin
url = URI.parse(url_str)
if !url.host && url_str[0] == '/'[0]
element[attribute] = "#{@url_prefix}#{url_str}"
end
rescue URI::Error => e
# leave it as is
end
end
end
end
return doc.to_s
end
end
def self.media_object_info(obj, client = nil, flavor = nil)
unless client
client = CanvasKaltura::ClientV3.new
client.startSession(CanvasKaltura::SessionType::ADMIN)
end
if flavor
assets = client.flavorAssetGetByEntryId(obj.media_id)
asset = assets.sort_by { |f| f[:size].to_i }.reverse.find { |f| f[:containerFormat] == flavor }
asset ||= assets.first
else
asset = client.flavorAssetGetOriginalAsset(obj.media_id)
end
# we use the media_id as the export filename, since it is guaranteed to
# be unique
filename = "#{obj.media_id}.#{asset[:fileExt]}" if asset
{ :asset => asset, :filename => filename }
end
# sub_path is the last part of a file url: /courses/1/files/1(/download)
# we want to handle any sort of extra params to the file url, both in the
# path components and the query string
def self.file_query_string(sub_path)
return if sub_path.blank?
qs = []
begin
uri = URI.parse(sub_path)
unless uri.path == "/preview" # defaults to preview, so no qs added
qs << "canvas_#{Rack::Utils.escape(uri.path[1..-1])}=1"
end
Rack::Utils.parse_query(uri.query).each do |k,v|
qs << "canvas_qs_#{Rack::Utils.escape(k)}=#{Rack::Utils.escape(v)}"
end
rescue URI::Error => e
# if we can't parse the url, we can't preserve canvas query params
end
return nil if qs.blank?
"?#{qs.join("&")}"
end
end
end