2020-10-27 00:50:13 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2011-02-01 09:57:29 +08:00
|
|
|
#
|
2017-04-28 04:03:36 +08:00
|
|
|
# Copyright (C) 2011 - present Instructure, Inc.
|
2011-02-01 09:57:29 +08:00
|
|
|
#
|
|
|
|
# This file is part of Canvas.
|
|
|
|
#
|
|
|
|
# Canvas is free software: you can redistribute it and/or modify it under
|
|
|
|
# the terms of the GNU Affero General Public License as published by the Free
|
|
|
|
# Software Foundation, version 3 of the License.
|
|
|
|
#
|
|
|
|
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
|
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
|
|
|
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
|
|
|
# details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License along
|
|
|
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
|
2015-04-09 01:21:08 +08:00
|
|
|
require 'nokogiri'
|
|
|
|
|
2011-02-01 09:57:29 +08:00
|
|
|
class ImportedHtmlConverter
|
|
|
|
include TextHelper
|
2014-02-05 04:53:04 +08:00
|
|
|
include HtmlTextHelper
|
2012-04-26 01:19:15 +08:00
|
|
|
|
2021-11-13 06:20:23 +08:00
|
|
|
CONTAINER_TYPES = ['div', 'p', 'body'].freeze
|
|
|
|
LINK_ATTRS = ['rel', 'href', 'src', 'data', 'value', 'longdesc'].freeze
|
2014-04-17 21:50:28 +08:00
|
|
|
|
2015-06-04 21:51:57 +08:00
|
|
|
attr_reader :link_parser, :link_resolver, :link_replacer
|
2013-04-04 02:30:57 +08:00
|
|
|
|
2015-06-04 21:51:57 +08:00
|
|
|
def initialize(migration)
|
|
|
|
@migration = migration
|
|
|
|
@link_parser = Importers::LinkParser.new(migration)
|
|
|
|
@link_resolver = Importers::LinkResolver.new(migration)
|
|
|
|
@link_replacer = Importers::LinkReplacer.new(migration)
|
|
|
|
end
|
2014-04-17 21:50:28 +08:00
|
|
|
|
2021-09-23 00:25:11 +08:00
|
|
|
def convert(html, item_type, mig_id, field, opts = {})
|
2016-06-27 21:26:56 +08:00
|
|
|
mig_id = mig_id.to_s
|
2021-01-12 02:24:13 +08:00
|
|
|
doc = Nokogiri::HTML5(html || "")
|
2015-06-04 21:51:57 +08:00
|
|
|
doc.search("*").each do |node|
|
|
|
|
LINK_ATTRS.each do |attr|
|
|
|
|
@link_parser.convert_link(node, attr, item_type, mig_id, field)
|
2011-02-01 09:57:29 +08:00
|
|
|
end
|
|
|
|
end
|
2011-04-24 08:51:53 +08:00
|
|
|
|
2011-04-20 08:28:06 +08:00
|
|
|
node = doc.at_css('body')
|
2015-06-04 21:51:57 +08:00
|
|
|
return "" unless node
|
2021-09-23 00:25:11 +08:00
|
|
|
|
2013-04-04 02:30:57 +08:00
|
|
|
if opts[:remove_outer_nodes_if_one_child]
|
2012-04-26 01:19:15 +08:00
|
|
|
while node.children.size == 1 && node.child.child
|
2018-05-09 23:27:12 +08:00
|
|
|
break unless CONTAINER_TYPES.member?(node.child.name) && node.child.attributes.blank?
|
2021-09-23 00:25:11 +08:00
|
|
|
|
2012-04-26 01:19:15 +08:00
|
|
|
node = node.child
|
|
|
|
end
|
2011-04-20 08:28:06 +08:00
|
|
|
end
|
2013-04-04 02:30:57 +08:00
|
|
|
|
2011-04-20 08:28:06 +08:00
|
|
|
node.inner_html
|
2015-06-04 21:51:57 +08:00
|
|
|
rescue Nokogiri::SyntaxError
|
2011-04-20 08:28:06 +08:00
|
|
|
""
|
2011-02-01 09:57:29 +08:00
|
|
|
end
|
2014-04-17 21:50:28 +08:00
|
|
|
|
2015-06-04 21:51:57 +08:00
|
|
|
def convert_text(text)
|
|
|
|
format_message(text || "")[0]
|
2012-07-26 00:48:17 +08:00
|
|
|
end
|
2011-04-19 11:16:36 +08:00
|
|
|
|
2015-06-04 21:51:57 +08:00
|
|
|
def resolve_content_links!
|
|
|
|
link_map = @link_parser.unresolved_link_map
|
|
|
|
return unless link_map.present?
|
2015-04-30 03:52:36 +08:00
|
|
|
|
2015-06-04 21:51:57 +08:00
|
|
|
@link_resolver.resolve_links!(link_map)
|
|
|
|
@link_replacer.replace_placeholders!(link_map)
|
|
|
|
@link_parser.reset!
|
2011-04-19 11:16:36 +08:00
|
|
|
end
|
2011-04-24 08:51:53 +08:00
|
|
|
|
2011-02-01 09:57:29 +08:00
|
|
|
def self.relative_url?(url)
|
2014-05-08 00:37:31 +08:00
|
|
|
URI.parse(url).relative? && !url.to_s.start_with?("//")
|
2015-07-24 23:32:11 +08:00
|
|
|
rescue URI::Error
|
2015-06-04 21:51:57 +08:00
|
|
|
# leave the url as it was
|
|
|
|
Rails.logger.warn "attempting to translate invalid url: #{url}"
|
|
|
|
false
|
2011-02-01 09:57:29 +08:00
|
|
|
end
|
2018-05-09 23:27:12 +08:00
|
|
|
end
|