changes based on epubcheck and iTMSTransporter validators

refs CNVS-20157

- Explicitly add media type when adding media to the ePub (gepub only
  tries to figure it out itself for images).
- Remove empty <a> ids added by tinymce.
- Add fallback text for <audio> & <video> tags for readers that don’t
  support media playback.
- Be more explicit about escaping / unescaping file paths.
- Use identifiers for url anchors. 

test plan:
- Test that epub generation works, nothing explicit beyond that.

Change-Id: I646866030420f570f7edbe7dd2ea87737e7129f6
Reviewed-on: https://gerrit.instructure.com/64039
Reviewed-by: Matt Berns <mberns@instructure.com>
Tested-by: Jenkins
Product-Review: John Corrigan <jcorrigan@instructure.com>
QA-Review: John Corrigan <jcorrigan@instructure.com>
This commit is contained in:
John Corrigan 2015-09-25 13:05:35 -05:00
parent 7a373a82b8
commit 1bcc778573
8 changed files with 96 additions and 12 deletions

View File

@ -12,7 +12,9 @@ module CC::Exporter::Epub
def add_files
files.each do |file_data|
File.open(file_data[:full_path]) do |file|
epub.add_item(file_data[:local_path], file)
epub.add_item(file_data[:local_path], file, file_data[:migration_id], {
'media-type' => file_data[:media_type]
})
end
end
end
@ -35,7 +37,8 @@ module CC::Exporter::Epub
def epub
@_epub ||= GEPUB::Book.new.tap do |b|
b.add_identifier('http:/example.jp/bookid_in_url', 'BookID', 'URL')
b.set_primary_identifier(pub_id)
b.language = I18n.locale
b.add_title(title, nil, GEPUB::TITLE_TYPE::MAIN) do |title|
title.file_as = "#{title} Epub"
title.display_seq = 1
@ -46,8 +49,12 @@ module CC::Exporter::Epub
end
end
def pub_id
@_pub_id ||= SecureRandom.uuid
end
def filename
"#{SecureRandom.uuid}.#{title}.epub"
"#{pub_id}.#{title}.epub"
end
end
end

View File

@ -13,6 +13,7 @@ module CC::Exporter::Epub::Converters
meta_node = open_file_xml(meta_path)
html_node = convert_media_from_node!(open_file(html_path))
html_node = remove_empty_ids!(html_node)
next unless html_node
@ -26,7 +27,7 @@ module CC::Exporter::Epub::Converters
if html_doc
_title, body = get_html_title_and_body(html_doc)
assignment['description'] = body
assignment['description'] = convert_placeholder_paths_from_string!(body)
end
['title', "allowed_extensions", "grading_type", "submission_types"].each do |string_type|
val = get_node_val(meta_doc, string_type)

View File

@ -20,6 +20,22 @@ module CC::Exporter::Epub::Converters
@resource_nodes_for_flat_manifest = {}
end
def convert_placeholder_paths_from_string!(html_string)
html_node = Nokogiri::HTML::DocumentFragment.parse(html_string)
html_node.tap do |node|
convert_media_paths!(node)
remove_empty_ids!(node)
end
html_node.to_s
end
def remove_empty_ids!(node)
node.search("a[id='']").each do |tag|
tag.remove_attribute('id')
end
node
end
# exports the package into the intermediary json
def export
unzip_archive

View File

@ -7,15 +7,35 @@ module CC::Exporter::Epub::Converters
files = []
@manifest.css("resource[type=#{WEBCONTENT}][href^=#{WEB_RESOURCES_FOLDER}]").each do |res|
full_path = File.expand_path(get_full_path(res['href']))
local_path = res['href'].sub(WEB_RESOURCES_FOLDER, CC::Exporter::Epub::FILE_PATH)
local_path = File.join(
File.dirname(res['href'].sub(WEB_RESOURCES_FOLDER, CC::Exporter::Epub::FILE_PATH)),
CGI.escape(File.basename(res['href']))
)
files << {
migration_id: res['identifier'],
local_path: local_path,
file_name: File.basename(local_path),
full_path: full_path
full_path: full_path,
media_type: media_type_for(File.basename(local_path))
}
end
files
end
# According to the [ePub 3 spec on item elements][1], the media-type attribute
# should be defined in accordance with [MIME document RFC2046][2].
#
# [1]: http://www.idpf.org/epub/30/spec/epub30-publications.html#elemdef-package-item
# [2]: http://tools.ietf.org/html/rfc2046
def media_type_for(file_name)
case File.extname(file_name)
when '.mp3'
'audio/basic'
when '.mov', '.mp4'
'video/mpg'
when '.jpg', '.png', '.gif'
"image/#{File.extname(file_name).gsub('.', '')}"
end
end
end
end

View File

@ -15,26 +15,66 @@ module CC::Exporter::Epub::Converters
convert_media_from_node!(html_node).to_s
end
# Find `<a>` or `<img>` tags and update the resource path attr (href or src)
# to replace WEB_CONTENT_TOKEN with CC::Exporter::Epub::FILE_PATH.
#
# Turns this:
#
# "$IMS-CC-FILEBASE$/image.jpg"
#
# into this:
#
# "media/image.jpg"
#
# which will match the directory the content is stored in in the ePub.
def convert_media_paths!(html_node)
{ a: 'href', img: 'src' }.each do |tag, attr|
html_node.search(tag).each do |match|
match[attr] = CGI.unescape(match[attr]).gsub(WEB_CONTENT_TOKEN, CC::Exporter::Epub::FILE_PATH)
unescaped = CGI.unescape(match[attr]).gsub(/\?.*/, '')
match[attr] = File.join(
File.dirname(unescaped).gsub(WEB_CONTENT_TOKEN, CC::Exporter::Epub::FILE_PATH),
CGI.escape(File.basename(unescaped))
)
end
end
end
# Find `<a>` tags with class `instructure_audio_link` and replaces it with
# an audio tag, which is supported by ePub documents.
#
# Turns this:
#
# "<a class='instructure_audio_link' href='media/audio.mp3'>Here is your audio link</a>
#
# into this:
#
# "<audio src='media/audio.mp3' controls='controls' />
def convert_audio_tags!(html_node)
html_node.search('a.instructure_audio_link, a.audio_comment').each do |audio_link|
audio_link.replace(<<-AUDIO_TAG)
<audio src="#{audio_link['href']}" controls="controls" />
<audio src="#{audio_link['href']}" controls="controls">
#{I18n.t('Audio content is not supported by your device or app.')}
</audio>
AUDIO_TAG
end
end
# Find `<a>` tags with class `instructure_video_link` and replaces it with
# a audio tag, which is supported by ePub documents.
#
# Turns this:
#
# "<a class='instructure_video_link' href='media/video.mp4'>Here is your video link</a>
#
# into this:
#
# "<video src='media/video.mp4' controls='controls' />
def convert_video_tags!(html_node)
html_node.search('a.instructure_video_link, a.video_comment').each do |video_link|
video_link.replace(<<-VIDEO_TAG)
<video src="#{video_link['href']}" controls="controls" />
<video src="#{video_link['href']}" controls="controls">
#{I18n.t('Video content is not supported by your device or app.')}
</video>
VIDEO_TAG
end
end

View File

@ -23,7 +23,7 @@ module CC::Exporter::Epub::Converters
quiz_meta_data = open_file_xml(quiz_meta_link)
quiz[:title] = get_node_val(quiz_meta_data, "title")
quiz[:description] = convert_media_from_string!(get_node_val(quiz_meta_data, "description"))
quiz[:description] = convert_placeholder_paths_from_string!(get_node_val(quiz_meta_data, "description"))
quiz[:due_at] = get_node_val(quiz_meta_data, "due_at")
quiz[:lock_at] = get_node_val(quiz_meta_data, "lock_at")
quiz[:unlock_at] = get_node_val(quiz_meta_data, "unlock_at")

View File

@ -25,7 +25,7 @@ module CC::Exporter::Epub::Converters
def convert_topic(cc_doc, meta_doc)
topic = {"resource_type" => :topics}
topic['description'] = convert_media_from_string!(get_node_val(cc_doc, 'text'))
topic['description'] = convert_placeholder_paths_from_string!(get_node_val(cc_doc, 'text'))
topic['title'] = get_node_val(cc_doc, 'title')
if meta_doc
topic['title'] = get_node_val(meta_doc, 'title')

View File

@ -21,7 +21,7 @@ module CC::Exporter::Epub::Converters
title, body, meta = get_html_title_and_body_and_meta_fields(doc)
wiki[:title] = title
wiki[:front_page] = meta['front_page'] == 'true'
wiki[:text] = convert_media_from_string!(body)
wiki[:text] = convert_placeholder_paths_from_string!(body)
wiki[:identifier] = wiki_name
wiki
end