display inline flv media in ePubs

refs CNVS-21799

- Fetch transcoded mp4 in place of exported flvs.
- Reference mp4s in place of flvs in generated xhtml.
- Do not include in files array files that should not be uploaded to the
  ePub.
- Remove from xhtml references to files that are not included in the
  ePub.

test plan:
- Have a course with an assignment description, quiz description,
  discussion topic description, or wiki page body that contains a
  recorded video comment.
- Export course as an ePub.
- Observe that the video displays inline.

- Have a course with unsupported media (the official _supported_ list
  is: 'jpg', 'gif', 'png', 'mp4', 'm4v', 'flv', 'mp3').
- Export an ePub.
- Observe that the unsupported file is not part of the package (using
  something like Calibre's book editor).
- Observe that the reference to the media has been replaced with an
  explanation of its absence.

Change-Id: I9f5eab8fa1c86e719d69e4ae42451e795ee4b471
Reviewed-on: https://gerrit.instructure.com/64365
Reviewed-by: Matt Berns <mberns@instructure.com>
Tested-by: Jenkins
QA-Review: Landon Holmstead <lholmstead@instructure.com>
Product-Review: Cosme Salazar <cosme@instructure.com>
This commit is contained in:
John Corrigan 2015-09-30 16:44:45 -05:00
parent fda055b06d
commit c3e48f62e1
5 changed files with 257 additions and 44 deletions

View File

@ -11,7 +11,7 @@ module CC::Exporter::Epub
def add_files
files.each do |file_data|
File.open(file_data[:full_path]) do |file|
File.open(file_data[:path_to_file]) do |file|
epub.add_item(file_data[:local_path], file, file_data[:migration_id], {
'media-type' => file_data[:media_type]
})

View File

@ -23,7 +23,7 @@ module CC::Exporter::Epub::Converters
def convert_placeholder_paths_from_string!(html_string)
html_node = Nokogiri::HTML::DocumentFragment.parse(html_string)
html_node.tap do |node|
convert_media_paths!(node)
convert_media_from_node!(node)
remove_empty_ids!(node)
end
html_node.to_s

View File

@ -3,38 +3,147 @@ module CC::Exporter::Epub::Converters
include CC::CCHelper
include CC::Exporter
def convert_files
files = []
@manifest.css("resource[type=#{WEBCONTENT}][href^=#{WEB_RESOURCES_FOLDER}]").each do |res|
full_path = File.expand_path(get_full_path(res['href']))
local_path = File.join(
File.dirname(res['href'].sub(WEB_RESOURCES_FOLDER, CC::Exporter::Epub::FILE_PATH)),
CGI.escape(File.basename(res['href']))
)
files << {
migration_id: res['identifier'],
local_path: local_path,
file_name: File.basename(local_path),
full_path: full_path,
media_type: media_type_for(File.basename(local_path))
}
class FlvToMp4
def initialize(flv_path)
@flv_path = flv_path
end
attr_reader :flv_path
def convert!
return flv_path unless mp4_url.present?
f = File.open(mp4_path, 'wb')
CanvasHttp.get(mp4_url) do |response|
f.write(response.body)
end
f.close
mp4_path
end
private
def flv_filename
File.basename(flv_path)
end
def media_id
flv_filename.gsub('.flv', '')
end
def media_source_fetcher
@_media_source_fetcher ||= MediaSourceFetcher.new(CanvasKaltura::ClientV3.new)
end
def mp4_path
flv_path.gsub('.flv', '.mp4')
end
def mp4_url
@_mp4_url ||= media_source_fetcher.fetch_preferred_source_url({
media_id: media_id,
file_extension: 'mp4'
})
end
files
end
# According to the [ePub 3 spec on item elements][1], the media-type attribute
# should be defined in accordance with [MIME document RFC2046][2].
#
# [1]: http://www.idpf.org/epub/30/spec/epub30-publications.html#elemdef-package-item
# [2]: http://tools.ietf.org/html/rfc2046
def media_type_for(file_name)
case File.extname(file_name)
when '.mp3'
'audio/basic'
when '.mov', '.mp4'
'video/mpg'
when '.jpg', '.png', '.gif'
"image/#{File.extname(file_name).gsub('.', '')}"
class FilePresenter
include CC::CCHelper
def initialize(original_path, data)
@original_path = original_path
@data = data
end
attr_reader :data, :original_path
def to_h
return {
migration_id: data['identifier'],
local_path: local_path,
file_name: File.basename(local_path),
path_to_file: path_to_file,
media_type: media_type
}
end
private
def flv?
File.extname(original_path) == '.flv'
end
# The path for the file in the ePub itself. This method removes the
# path up to (and including) the standard export placeholder
# (WEB_RESOURCES_FOLDER) and replaces it with the root folder for media
# in the ePub (CC::Exporter::Epub::FILE_PATH), while maintaining folder
# structure beyond the export root.
#
# (Note that the path we're working with in this class is the full path
# to the file, not the path relative to the unzipped export.
#
# Changes this:
#
# /Users/username/Documents/canvas-lms/exports/d20150930-26055-1wlhczz/web_resources/media_objects/m-ArYKbPPdLwtbhcHPjxYsQvMeDCPZKZp.mp3
#
# into this:
#
# media/media_objects/m-ArYKbPPdLwtbhcHPjxYsQvMeDCPZKZp.mp3
#
# Or this:
#
# /Users/username/Documents/canvas-lms/exports/d20150930-26055-1wlhczz/web_resources/m-ArYKbPPdLwtbhcHPjxYsQvMeDCPZKZp.mp3
#
# into this:
#
# media/m-ArYKbPPdLwtbhcHPjxYsQvMeDCPZKZp.mp3
def local_path
unless @_local_path
path_args = [
CC::Exporter::Epub::FILE_PATH,
File.dirname(original_path.match(/#{WEB_RESOURCES_FOLDER}\/(.+)$/)[1]),
CGI.escape(File.basename(path_to_file))
].reject do |path_part|
path_part.match(/^\.$/)
end
@_local_path = File.join(path_args)
end
@_local_path
end
# According to the [ePub 3 spec on item elements][1], the media-type attribute
# should be defined in accordance with [MIME document RFC2046][2].
#
# [1]: http://www.idpf.org/epub/30/spec/epub30-publications.html#elemdef-package-item
# [2]: http://tools.ietf.org/html/rfc2046
def media_type
case File.extname(path_to_file)
when '.mp3'
'audio/basic'
when '.m4v', '.mp4'
'video/mpg'
when '.jpg', '.png', '.gif', '.jpeg'
"image/#{File.extname(path_to_file).delete('.')}"
else
nil
end
end
def path_to_file
@_path_to_file ||= flv? ? FlvToMp4.new(original_path).convert! : original_path
end
end
def convert_files
all_files = @manifest.css("resource[type=#{WEBCONTENT}][href^=#{WEB_RESOURCES_FOLDER}]").map do |res|
original_path = File.expand_path(get_full_path(res['href']))
FilePresenter.new(original_path, res).to_h
end
# Unsupported file types will end up with a `nil` media_type, and they should
# not be included in the final files hash. The reason we do not filter them out
# initially is because we attempt to manipulate certainly unsupported files types,
# namely flvs, and we do not want to remove them from the collection until we're
# sure there is nothing that can be done.
all_files.reject do |file|
file[:media_type].nil?
end
end
end

View File

@ -5,6 +5,7 @@ module CC::Exporter::Epub::Converters
def convert_media_from_node!(html_node)
html_node.tap do |node|
convert_media_paths!(node)
convert_flv_paths!(node)
convert_audio_tags!(node)
convert_video_tags!(node)
end
@ -29,26 +30,65 @@ module CC::Exporter::Epub::Converters
# which will match the directory the content is stored in in the ePub.
def convert_media_paths!(html_node)
{ a: 'href', img: 'src' }.each do |tag, attr|
html_node.search(tag).each do |match|
selector = "#{tag}[#{attr}*='#{WEB_CONTENT_TOKEN.gsub('$', '')}']"
html_node.search(selector).each do |match|
unescaped = CGI.unescape(match[attr]).gsub(/\?.*/, '')
match[attr] = File.join(
File.dirname(unescaped).gsub(WEB_CONTENT_TOKEN, CC::Exporter::Epub::FILE_PATH),
CGI.escape(File.basename(unescaped))
)
if path_should_be_converted?(unescaped)
match[attr] = converted_media_path(unescaped)
else
match.replace(<<-SPAN_TAG)
<span>
#{I18n.t("File %{filename} could not be included in the ePub document.", {
filename: File.basename(unescaped)
})}
</span>
SPAN_TAG
end
end
end
end
def path_should_be_converted?(path)
filename = File.basename(path).gsub(/#{File.extname(path)}/, '')
@course[:files].any? do |file|
file[:file_name].match(filename)
end
end
def converted_media_path(path)
File.join(
File.dirname(path).gsub(WEB_CONTENT_TOKEN, CC::Exporter::Epub::FILE_PATH),
CGI.escape(File.basename(path))
)
end
# Find `<a>` tags and update references to `.flv` files to `.mp4` files.
#
# Turns this:
#
# media/media_objects/m-5G7G2CcbF2nd3nZ8pyT1z16ytNaQuQ1X.flv
#
# into this:
#
# media/media_objects/m-5G7G2CcbF2nd3nZ8pyT1z16ytNaQuQ1X.mp4
def convert_flv_paths!(html_node)
html_node.search("a[href*='flv']").each do |tag|
tag['href'] = tag['href'].gsub('.flv', '.mp4')
end
end
# Find `<a>` tags with class `instructure_audio_link` and replaces it with
# an audio tag, which is supported by ePub documents.
#
# Turns this:
#
# "<a class='instructure_audio_link' href='media/audio.mp3'>Here is your audio link</a>
# <a class='instructure_audio_link' href='media/audio.mp3'>Here is your audio link</a>
#
# into this:
#
# "<audio src='media/audio.mp3' controls='controls' />
# <audio src='media/audio.mp3' controls='controls' />
def convert_audio_tags!(html_node)
html_node.search('a.instructure_audio_link, a.audio_comment').each do |audio_link|
audio_link.replace(<<-AUDIO_TAG)
@ -60,15 +100,15 @@ module CC::Exporter::Epub::Converters
end
# Find `<a>` tags with class `instructure_video_link` and replaces it with
# a audio tag, which is supported by ePub documents.
# an audio tag, which is supported by ePub documents.
#
# Turns this:
#
# "<a class='instructure_video_link' href='media/video.mp4'>Here is your video link</a>
# <a class='instructure_video_link' href='media/video.mp4'>Here is your video link</a>
#
# into this:
#
# "<video src='media/video.mp4' controls='controls' />
# <video src='media/video.mp4' controls='controls' />
def convert_video_tags!(html_node)
html_node.search('a.instructure_video_link, a.video_comment').each do |video_link|
video_link.replace(<<-VIDEO_TAG)

View File

@ -3,20 +3,45 @@ require File.expand_path(File.dirname(__FILE__) + '/../../../cc_spec_helper')
describe "MediaConverter" do
class MediaConverterTest
include CC::Exporter::Epub::Converters::MediaConverter
def initialize(course={})
@course = course
end
end
describe "#convert_media_paths!" do
let_once(:a_href) do
"#{CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN)}/path/to/img.jpg"
end
let_once(:img_src) do
"#{CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN)}/path/to/img.jpg"
end
let_once(:mov_path) do
"#{CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN)}/path/to/movie.mov"
end
let(:doc) do
Nokogiri::HTML::DocumentFragment.parse(<<-HTML)
<div>
<a href="#{CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN)}/path/to/img.jpg">
<a href="#{a_href}">
Image Link
</a>
<img src="#{CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN)}/path/to/img.jpg" />
<img src="#{img_src}" />
<a href="#{mov_path}">
This is no good.
</a>
</div>
HTML
end
subject(:test_instance) { MediaConverterTest.new }
subject(:test_instance) do
MediaConverterTest.new({
files: [{
file_name: File.basename(a_href)
}, {
file_name: File.basename(img_src)
}]
})
end
it "should update link hrefs containing WEB_CONTENT_TOKEN" do
expect(doc.search('a').all? do |element|
@ -49,6 +74,45 @@ describe "MediaConverter" do
element['src'].match(CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN))
end).to be_falsey
end
it "should replace media that is not present with a span" do
expect(doc.search("a[href*='#{mov_path}']").any?).to be_truthy, 'precondition'
expect(doc.search("span").empty?).to be_truthy, 'precondition'
test_instance.convert_media_paths!(doc)
expect(doc.search("a[href*='#{mov_path}']").empty?).to be_truthy
expect(doc.search("span").any?).to be_truthy
end
end
describe "#convert_flv_paths!" do
let(:doc) do
Nokogiri::HTML::DocumentFragment.parse(<<-HTML)
<div>
<a href="media/media_objects/m-5G7G2CcbF2nd3nZ8pyT1z16ytNaQuQ1X.flv">
Video Comment Link
</a>
</div>
HTML
end
subject(:test_instance) { MediaConverterTest.new }
it "should hrefs with flv to hrefs with mp4" do
expect(doc.search('a').all? do |element|
element['href'].match('flv')
end).to be_truthy, 'precondition'
test_instance.convert_flv_paths!(doc)
expect(doc.search('a').all? do |element|
element['href'].match('mp4')
end).to be_truthy
expect(doc.search('a').all? do |element|
element['href'].match('flv')
end).to be_falsey
end
end
describe "#convert_audio_tags!" do