changes based on epubcheck and iTMSTransporter validators

refs CNVS-20157 - Explicitly add media type when adding media to the ePub (gepub only tries to figure it out itself for images). - Remove empty <a> ids added by tinymce. - Add fallback text for <audio> & <video> tags for readers that don’t support media playback. - Be more explicit about escaping / unescaping file paths. - Use identifiers for url anchors. test plan: - Test that epub generation works, nothing explicit beyond that. Change-Id: I646866030420f570f7edbe7dd2ea87737e7129f6 Reviewed-on: https://gerrit.instructure.com/64039 Reviewed-by: Matt Berns <mberns@instructure.com> Tested-by: Jenkins Product-Review: John Corrigan <jcorrigan@instructure.com> QA-Review: John Corrigan <jcorrigan@instructure.com>
2015-09-25 13:05:35 -05:00 · 2015-09-25 13:05:35 -05:00 · 1bcc778573
parent 7a373a82b8
commit 1bcc778573
8 changed files with 96 additions and 12 deletions
--- a/lib/cc/exporter/epub/book.rb
+++ b/lib/cc/exporter/epub/book.rb
@ -12,7 +12,9 @@ module CC::Exporter::Epub
    def add_files
      files.each do |file_data|
        File.open(file_data[:full_path]) do |file|
-          epub.add_item(file_data[:local_path], file)
+          epub.add_item(file_data[:local_path], file, file_data[:migration_id], {
+            'media-type' => file_data[:media_type]
+          })
        end
      end
    end
@ -35,7 +37,8 @@ module CC::Exporter::Epub

    def epub
      @_epub ||= GEPUB::Book.new.tap do |b|
-        b.add_identifier('http:/example.jp/bookid_in_url', 'BookID', 'URL')
+        b.set_primary_identifier(pub_id)
+        b.language = I18n.locale
        b.add_title(title, nil, GEPUB::TITLE_TYPE::MAIN) do |title|
          title.file_as = "#{title} Epub"
          title.display_seq = 1
@ -46,8 +49,12 @@ module CC::Exporter::Epub
      end
    end

+    def pub_id
+      @_pub_id ||= SecureRandom.uuid
+    end
+
    def filename
-      "#{SecureRandom.uuid}.#{title}.epub"
+      "#{pub_id}.#{title}.epub"
    end
  end
 end
--- a/lib/cc/exporter/epub/converters/assignment_epub_converter.rb
+++ b/lib/cc/exporter/epub/converters/assignment_epub_converter.rb
@ -13,6 +13,7 @@ module CC::Exporter::Epub::Converters

        meta_node = open_file_xml(meta_path)
        html_node = convert_media_from_node!(open_file(html_path))
+        html_node = remove_empty_ids!(html_node)

        next unless html_node

@ -26,7 +27,7 @@ module CC::Exporter::Epub::Converters

      if html_doc
        _title, body = get_html_title_and_body(html_doc)
-        assignment['description'] = body
+        assignment['description'] = convert_placeholder_paths_from_string!(body)
      end
      ['title', "allowed_extensions", "grading_type", "submission_types"].each do |string_type|
        val = get_node_val(meta_doc, string_type)
--- a/lib/cc/exporter/epub/converters/cartridge_converter.rb
+++ b/lib/cc/exporter/epub/converters/cartridge_converter.rb
@ -20,6 +20,22 @@ module CC::Exporter::Epub::Converters
      @resource_nodes_for_flat_manifest = {}
    end

+    def convert_placeholder_paths_from_string!(html_string)
+      html_node = Nokogiri::HTML::DocumentFragment.parse(html_string)
+      html_node.tap do |node|
+        convert_media_paths!(node)
+        remove_empty_ids!(node)
+      end
+      html_node.to_s
+    end
+
+    def remove_empty_ids!(node)
+      node.search("a[id='']").each do |tag|
+        tag.remove_attribute('id')
+      end
+      node
+    end
+
    # exports the package into the intermediary json
    def export
      unzip_archive
--- a/lib/cc/exporter/epub/converters/files_converter.rb
+++ b/lib/cc/exporter/epub/converters/files_converter.rb
@ -7,15 +7,35 @@ module CC::Exporter::Epub::Converters
      files = []
      @manifest.css("resource[type=#{WEBCONTENT}][href^=#{WEB_RESOURCES_FOLDER}]").each do |res|
        full_path = File.expand_path(get_full_path(res['href']))
-        local_path = res['href'].sub(WEB_RESOURCES_FOLDER, CC::Exporter::Epub::FILE_PATH)
+        local_path = File.join(
+          File.dirname(res['href'].sub(WEB_RESOURCES_FOLDER, CC::Exporter::Epub::FILE_PATH)),
+          CGI.escape(File.basename(res['href']))
+        )
        files << {
          migration_id: res['identifier'],
          local_path: local_path,
          file_name: File.basename(local_path),
-          full_path: full_path
+          full_path: full_path,
+          media_type: media_type_for(File.basename(local_path))
        }
      end
      files
    end
+
+    # According to the [ePub 3 spec on item elements][1], the media-type attribute
+    # should be defined in accordance with [MIME document RFC2046][2].
+    #
+    # [1]: http://www.idpf.org/epub/30/spec/epub30-publications.html#elemdef-package-item
+    # [2]: http://tools.ietf.org/html/rfc2046
+    def media_type_for(file_name)
+      case File.extname(file_name)
+      when '.mp3'
+        'audio/basic'
+      when '.mov', '.mp4'
+        'video/mpg'
+      when '.jpg', '.png', '.gif'
+        "image/#{File.extname(file_name).gsub('.', '')}"
+      end
+    end
  end
 end
--- a/lib/cc/exporter/epub/converters/media_converter.rb
+++ b/lib/cc/exporter/epub/converters/media_converter.rb
@ -15,26 +15,66 @@ module CC::Exporter::Epub::Converters
      convert_media_from_node!(html_node).to_s
    end

+    # Find `<a>` or `<img>` tags and update the resource path attr (href or src)
+    # to replace WEB_CONTENT_TOKEN with CC::Exporter::Epub::FILE_PATH.
+    #
+    # Turns this:
+    #
+    # "$IMS-CC-FILEBASE$/image.jpg"
+    #
+    # into this:
+    #
+    # "media/image.jpg"
+    #
+    # which will match the directory the content is stored in in the ePub.
    def convert_media_paths!(html_node)
      { a: 'href', img: 'src' }.each do |tag, attr|
        html_node.search(tag).each do |match|
-          match[attr] = CGI.unescape(match[attr]).gsub(WEB_CONTENT_TOKEN, CC::Exporter::Epub::FILE_PATH)
+          unescaped = CGI.unescape(match[attr]).gsub(/\?.*/, '')
+          match[attr] = File.join(
+            File.dirname(unescaped).gsub(WEB_CONTENT_TOKEN, CC::Exporter::Epub::FILE_PATH),
+            CGI.escape(File.basename(unescaped))
+          )
        end
      end
    end

+    # Find `<a>` tags with class `instructure_audio_link` and replaces it with
+    # an audio tag, which is supported by ePub documents.
+    #
+    # Turns this:
+    #
+    # "<a class='instructure_audio_link' href='media/audio.mp3'>Here is your audio link</a>
+    #
+    # into this:
+    #
+    # "<audio src='media/audio.mp3' controls='controls' />
    def convert_audio_tags!(html_node)
      html_node.search('a.instructure_audio_link, a.audio_comment').each do |audio_link|
        audio_link.replace(<<-AUDIO_TAG)
-          <audio src="#{audio_link['href']}" controls="controls" />
+          <audio src="#{audio_link['href']}" controls="controls">
+            #{I18n.t('Audio content is not supported by your device or app.')}
+          </audio>
        AUDIO_TAG
      end
    end

+    # Find `<a>` tags with class `instructure_video_link` and replaces it with
+    # a audio tag, which is supported by ePub documents.
+    #
+    # Turns this:
+    #
+    # "<a class='instructure_video_link' href='media/video.mp4'>Here is your video link</a>
+    #
+    # into this:
+    #
+    # "<video src='media/video.mp4' controls='controls' />
    def convert_video_tags!(html_node)
      html_node.search('a.instructure_video_link, a.video_comment').each do |video_link|
        video_link.replace(<<-VIDEO_TAG)
-          <video src="#{video_link['href']}" controls="controls" />
+          <video src="#{video_link['href']}" controls="controls">
+            #{I18n.t('Video content is not supported by your device or app.')}
+          </video>
        VIDEO_TAG
      end
    end
--- a/lib/cc/exporter/epub/converters/quiz_epub_converter.rb
+++ b/lib/cc/exporter/epub/converters/quiz_epub_converter.rb
@ -23,7 +23,7 @@ module CC::Exporter::Epub::Converters
      quiz_meta_data = open_file_xml(quiz_meta_link)

      quiz[:title] = get_node_val(quiz_meta_data, "title")
-      quiz[:description] = convert_media_from_string!(get_node_val(quiz_meta_data, "description"))
+      quiz[:description] = convert_placeholder_paths_from_string!(get_node_val(quiz_meta_data, "description"))
      quiz[:due_at] = get_node_val(quiz_meta_data, "due_at")
      quiz[:lock_at] = get_node_val(quiz_meta_data, "lock_at")
      quiz[:unlock_at] = get_node_val(quiz_meta_data, "unlock_at")
--- a/lib/cc/exporter/epub/converters/topic_epub_converter.rb
+++ b/lib/cc/exporter/epub/converters/topic_epub_converter.rb
@ -25,7 +25,7 @@ module CC::Exporter::Epub::Converters

    def convert_topic(cc_doc, meta_doc)
      topic = {"resource_type" => :topics}
-      topic['description'] = convert_media_from_string!(get_node_val(cc_doc, 'text'))
+      topic['description'] = convert_placeholder_paths_from_string!(get_node_val(cc_doc, 'text'))
      topic['title'] = get_node_val(cc_doc, 'title')
      if meta_doc
        topic['title'] = get_node_val(meta_doc, 'title')
--- a/lib/cc/exporter/epub/converters/wiki_epub_converter.rb
+++ b/lib/cc/exporter/epub/converters/wiki_epub_converter.rb
@ -21,7 +21,7 @@ module CC::Exporter::Epub::Converters
      title, body, meta = get_html_title_and_body_and_meta_fields(doc)
      wiki[:title] = title
      wiki[:front_page] = meta['front_page'] == 'true'
-      wiki[:text] = convert_media_from_string!(body)
+      wiki[:text] = convert_placeholder_paths_from_string!(body)
      wiki[:identifier] = wiki_name
      wiki
    end