display inline flv media in ePubs

refs CNVS-21799 - Fetch transcoded mp4 in place of exported flvs. - Reference mp4s in place of flvs in generated xhtml. - Do not include in files array files that should not be uploaded to the ePub. - Remove from xhtml references to files that are not included in the ePub. test plan: - Have a course with an assignment description, quiz description, discussion topic description, or wiki page body that contains a recorded video comment. - Export course as an ePub. - Observe that the video displays inline. - Have a course with unsupported media (the official _supported_ list is: 'jpg', 'gif', 'png', 'mp4', 'm4v', 'flv', 'mp3'). - Export an ePub. - Observe that the unsupported file is not part of the package (using something like Calibre's book editor). - Observe that the reference to the media has been replaced with an explanation of its absence. Change-Id: I9f5eab8fa1c86e719d69e4ae42451e795ee4b471 Reviewed-on: https://gerrit.instructure.com/64365 Reviewed-by: Matt Berns <mberns@instructure.com> Tested-by: Jenkins QA-Review: Landon Holmstead <lholmstead@instructure.com> Product-Review: Cosme Salazar <cosme@instructure.com>
2015-09-30 16:44:45 -05:00 · 2015-09-30 16:44:45 -05:00 · c3e48f62e1
parent fda055b06d
commit c3e48f62e1
5 changed files with 257 additions and 44 deletions
--- a/lib/cc/exporter/epub/book.rb
+++ b/lib/cc/exporter/epub/book.rb
@ -11,7 +11,7 @@ module CC::Exporter::Epub

    def add_files
      files.each do |file_data|
-        File.open(file_data[:full_path]) do |file|
+        File.open(file_data[:path_to_file]) do |file|
          epub.add_item(file_data[:local_path], file, file_data[:migration_id], {
            'media-type' => file_data[:media_type]
          })
--- a/lib/cc/exporter/epub/converters/cartridge_converter.rb
+++ b/lib/cc/exporter/epub/converters/cartridge_converter.rb
@ -23,7 +23,7 @@ module CC::Exporter::Epub::Converters
    def convert_placeholder_paths_from_string!(html_string)
      html_node = Nokogiri::HTML::DocumentFragment.parse(html_string)
      html_node.tap do |node|
-        convert_media_paths!(node)
+        convert_media_from_node!(node)
        remove_empty_ids!(node)
      end
      html_node.to_s
--- a/lib/cc/exporter/epub/converters/files_converter.rb
+++ b/lib/cc/exporter/epub/converters/files_converter.rb
@ -3,38 +3,147 @@ module CC::Exporter::Epub::Converters
    include CC::CCHelper
    include CC::Exporter

-    def convert_files
-      files = []
-      @manifest.css("resource[type=#{WEBCONTENT}][href^=#{WEB_RESOURCES_FOLDER}]").each do |res|
-        full_path = File.expand_path(get_full_path(res['href']))
-        local_path = File.join(
-          File.dirname(res['href'].sub(WEB_RESOURCES_FOLDER, CC::Exporter::Epub::FILE_PATH)),
-          CGI.escape(File.basename(res['href']))
-        )
-        files << {
-          migration_id: res['identifier'],
-          local_path: local_path,
-          file_name: File.basename(local_path),
-          full_path: full_path,
-          media_type: media_type_for(File.basename(local_path))
-        }
+    class FlvToMp4
+      def initialize(flv_path)
+        @flv_path = flv_path
+      end
+      attr_reader :flv_path
+
+      def convert!
+        return flv_path unless mp4_url.present?
+
+        f = File.open(mp4_path, 'wb')
+        CanvasHttp.get(mp4_url) do |response|
+          f.write(response.body)
+        end
+        f.close
+
+        mp4_path
+      end
+
+      private
+      def flv_filename
+        File.basename(flv_path)
+      end
+
+      def media_id
+        flv_filename.gsub('.flv', '')
+      end
+
+      def media_source_fetcher
+        @_media_source_fetcher ||= MediaSourceFetcher.new(CanvasKaltura::ClientV3.new)
+      end
+
+      def mp4_path
+        flv_path.gsub('.flv', '.mp4')
+      end
+
+      def mp4_url
+        @_mp4_url ||= media_source_fetcher.fetch_preferred_source_url({
+          media_id: media_id,
+          file_extension: 'mp4'
+        })
      end
-      files
    end

-    # According to the [ePub 3 spec on item elements][1], the media-type attribute
-    # should be defined in accordance with [MIME document RFC2046][2].
-    #
-    # [1]: http://www.idpf.org/epub/30/spec/epub30-publications.html#elemdef-package-item
-    # [2]: http://tools.ietf.org/html/rfc2046
-    def media_type_for(file_name)
-      case File.extname(file_name)
-      when '.mp3'
-        'audio/basic'
-      when '.mov', '.mp4'
-        'video/mpg'
-      when '.jpg', '.png', '.gif'
-        "image/#{File.extname(file_name).gsub('.', '')}"
+    class FilePresenter
+      include CC::CCHelper
+
+      def initialize(original_path, data)
+        @original_path = original_path
+        @data = data
+      end
+      attr_reader :data, :original_path
+
+      def to_h
+        return {
+          migration_id: data['identifier'],
+          local_path: local_path,
+          file_name: File.basename(local_path),
+          path_to_file: path_to_file,
+          media_type: media_type
+        }
+      end
+
+      private
+      def flv?
+        File.extname(original_path) == '.flv'
+      end
+
+      # The path for the file in the ePub itself. This method removes the
+      # path up to (and including) the standard export placeholder
+      # (WEB_RESOURCES_FOLDER) and replaces it with the root folder for media
+      # in the ePub (CC::Exporter::Epub::FILE_PATH), while maintaining folder
+      # structure beyond the export root.
+      #
+      # (Note that the path we're working with in this class is the full path
+      # to the file, not the path relative to the unzipped export.
+      #
+      # Changes this:
+      #
+      # /Users/username/Documents/canvas-lms/exports/d20150930-26055-1wlhczz/web_resources/media_objects/m-ArYKbPPdLwtbhcHPjxYsQvMeDCPZKZp.mp3
+      #
+      # into this:
+      #
+      # media/media_objects/m-ArYKbPPdLwtbhcHPjxYsQvMeDCPZKZp.mp3
+      #
+      # Or this:
+      #
+      # /Users/username/Documents/canvas-lms/exports/d20150930-26055-1wlhczz/web_resources/m-ArYKbPPdLwtbhcHPjxYsQvMeDCPZKZp.mp3
+      #
+      # into this:
+      #
+      # media/m-ArYKbPPdLwtbhcHPjxYsQvMeDCPZKZp.mp3
+      def local_path
+        unless @_local_path
+          path_args = [
+            CC::Exporter::Epub::FILE_PATH,
+            File.dirname(original_path.match(/#{WEB_RESOURCES_FOLDER}\/(.+)$/)[1]),
+            CGI.escape(File.basename(path_to_file))
+          ].reject do |path_part|
+            path_part.match(/^\.$/)
+          end
+          @_local_path = File.join(path_args)
+        end
+        @_local_path
+      end
+
+      # According to the [ePub 3 spec on item elements][1], the media-type attribute
+      # should be defined in accordance with [MIME document RFC2046][2].
+      #
+      # [1]: http://www.idpf.org/epub/30/spec/epub30-publications.html#elemdef-package-item
+      # [2]: http://tools.ietf.org/html/rfc2046
+      def media_type
+        case File.extname(path_to_file)
+        when '.mp3'
+          'audio/basic'
+        when '.m4v', '.mp4'
+          'video/mpg'
+        when '.jpg', '.png', '.gif', '.jpeg'
+          "image/#{File.extname(path_to_file).delete('.')}"
+        else
+          nil
+        end
+      end
+
+      def path_to_file
+        @_path_to_file ||= flv? ? FlvToMp4.new(original_path).convert! : original_path
+      end
+    end
+
+    def convert_files
+      all_files = @manifest.css("resource[type=#{WEBCONTENT}][href^=#{WEB_RESOURCES_FOLDER}]").map do |res|
+        original_path = File.expand_path(get_full_path(res['href']))
+        FilePresenter.new(original_path, res).to_h
+      end
+
+      # Unsupported file types will end up with a `nil` media_type, and they should
+      # not be included in the final files hash. The reason we do not filter them out
+      # initially is because we attempt to manipulate certainly unsupported files types,
+      # namely flvs, and we do not want to remove them from the collection until we're
+      # sure there is nothing that can be done.
+      all_files.reject do |file|
+        file[:media_type].nil?
      end
    end
  end
--- a/lib/cc/exporter/epub/converters/media_converter.rb
+++ b/lib/cc/exporter/epub/converters/media_converter.rb
@ -5,6 +5,7 @@ module CC::Exporter::Epub::Converters
    def convert_media_from_node!(html_node)
      html_node.tap do |node|
        convert_media_paths!(node)
+        convert_flv_paths!(node)
        convert_audio_tags!(node)
        convert_video_tags!(node)
      end
@ -29,26 +30,65 @@ module CC::Exporter::Epub::Converters
    # which will match the directory the content is stored in in the ePub.
    def convert_media_paths!(html_node)
      { a: 'href', img: 'src' }.each do |tag, attr|
-        html_node.search(tag).each do |match|
+        selector = "#{tag}[#{attr}*='#{WEB_CONTENT_TOKEN.gsub('$', '')}']"
+        html_node.search(selector).each do |match|
          unescaped = CGI.unescape(match[attr]).gsub(/\?.*/, '')
-          match[attr] = File.join(
-            File.dirname(unescaped).gsub(WEB_CONTENT_TOKEN, CC::Exporter::Epub::FILE_PATH),
-            CGI.escape(File.basename(unescaped))
-          )
+
+          if path_should_be_converted?(unescaped)
+            match[attr] = converted_media_path(unescaped)
+          else
+            match.replace(<<-SPAN_TAG)
+              <span>
+                #{I18n.t("File %{filename} could not be included in the ePub document.", {
+                  filename: File.basename(unescaped)
+                })}
+              </span>
+            SPAN_TAG
+          end
        end
      end
    end

+    def path_should_be_converted?(path)
+      filename = File.basename(path).gsub(/#{File.extname(path)}/, '')
+
+      @course[:files].any? do |file|
+        file[:file_name].match(filename)
+      end
+    end
+
+    def converted_media_path(path)
+      File.join(
+        File.dirname(path).gsub(WEB_CONTENT_TOKEN, CC::Exporter::Epub::FILE_PATH),
+        CGI.escape(File.basename(path))
+      )
+    end
+
+    # Find `<a>` tags and update references to `.flv` files to `.mp4` files.
+    #
+    # Turns this:
+    #
+    # media/media_objects/m-5G7G2CcbF2nd3nZ8pyT1z16ytNaQuQ1X.flv
+    #
+    # into this:
+    #
+    # media/media_objects/m-5G7G2CcbF2nd3nZ8pyT1z16ytNaQuQ1X.mp4
+    def convert_flv_paths!(html_node)
+      html_node.search("a[href*='flv']").each do |tag|
+        tag['href'] = tag['href'].gsub('.flv', '.mp4')
+      end
+    end
+
    # Find `<a>` tags with class `instructure_audio_link` and replaces it with
    # an audio tag, which is supported by ePub documents.
    #
    # Turns this:
    #
-    # "<a class='instructure_audio_link' href='media/audio.mp3'>Here is your audio link</a>
+    # <a class='instructure_audio_link' href='media/audio.mp3'>Here is your audio link</a>
    #
    # into this:
    #
-    # "<audio src='media/audio.mp3' controls='controls' />
+    # <audio src='media/audio.mp3' controls='controls' />
    def convert_audio_tags!(html_node)
      html_node.search('a.instructure_audio_link, a.audio_comment').each do |audio_link|
        audio_link.replace(<<-AUDIO_TAG)
@ -60,15 +100,15 @@ module CC::Exporter::Epub::Converters
    end

    # Find `<a>` tags with class `instructure_video_link` and replaces it with
-    # a audio tag, which is supported by ePub documents.
+    # an audio tag, which is supported by ePub documents.
    #
    # Turns this:
    #
-    # "<a class='instructure_video_link' href='media/video.mp4'>Here is your video link</a>
+    # <a class='instructure_video_link' href='media/video.mp4'>Here is your video link</a>
    #
    # into this:
    #
-    # "<video src='media/video.mp4' controls='controls' />
+    # <video src='media/video.mp4' controls='controls' />
    def convert_video_tags!(html_node)
      html_node.search('a.instructure_video_link, a.video_comment').each do |video_link|
        video_link.replace(<<-VIDEO_TAG)
--- a/spec/lib/cc/exporter/epub/converters/media_converter_spec.rb
+++ b/spec/lib/cc/exporter/epub/converters/media_converter_spec.rb
@ -3,20 +3,45 @@ require File.expand_path(File.dirname(__FILE__) + '/../../../cc_spec_helper')
 describe "MediaConverter" do
  class MediaConverterTest
    include CC::Exporter::Epub::Converters::MediaConverter
+
+    def initialize(course={})
+      @course = course
+    end
  end

  describe "#convert_media_paths!" do
+    let_once(:a_href) do
+      "#{CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN)}/path/to/img.jpg"
+    end
+    let_once(:img_src) do
+      "#{CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN)}/path/to/img.jpg"
+    end
+    let_once(:mov_path) do
+      "#{CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN)}/path/to/movie.mov"
+    end
+
    let(:doc) do
      Nokogiri::HTML::DocumentFragment.parse(<<-HTML)
        <div>
-          <a href="#{CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN)}/path/to/img.jpg">
+          <a href="#{a_href}">
            Image Link
          </a>
-          <img src="#{CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN)}/path/to/img.jpg" />
+          <img src="#{img_src}" />
+          <a href="#{mov_path}">
+            This is no good.
+          </a>
        </div>
      HTML
    end
-    subject(:test_instance) { MediaConverterTest.new }
+    subject(:test_instance) do
+      MediaConverterTest.new({
+        files: [{
+          file_name: File.basename(a_href)
+        }, {
+          file_name: File.basename(img_src)
+        }]
+      })
+    end

    it "should update link hrefs containing WEB_CONTENT_TOKEN" do
      expect(doc.search('a').all? do |element|
@ -49,6 +74,45 @@ describe "MediaConverter" do
        element['src'].match(CGI.escape(MediaConverterTest::WEB_CONTENT_TOKEN))
      end).to be_falsey
    end
+
+    it "should replace media that is not present with a span" do
+      expect(doc.search("a[href*='#{mov_path}']").any?).to be_truthy, 'precondition'
+      expect(doc.search("span").empty?).to be_truthy, 'precondition'
+
+      test_instance.convert_media_paths!(doc)
+
+      expect(doc.search("a[href*='#{mov_path}']").empty?).to be_truthy
+      expect(doc.search("span").any?).to be_truthy
+    end
+  end
+
+  describe "#convert_flv_paths!" do
+    let(:doc) do
+      Nokogiri::HTML::DocumentFragment.parse(<<-HTML)
+        <div>
+          <a href="media/media_objects/m-5G7G2CcbF2nd3nZ8pyT1z16ytNaQuQ1X.flv">
+            Video Comment Link
+          </a>
+        </div>
+      HTML
+    end
+    subject(:test_instance) { MediaConverterTest.new }
+
+    it "should hrefs with flv to hrefs with mp4" do
+      expect(doc.search('a').all? do |element|
+        element['href'].match('flv')
+      end).to be_truthy, 'precondition'
+
+      test_instance.convert_flv_paths!(doc)
+
+      expect(doc.search('a').all? do |element|
+        element['href'].match('mp4')
+      end).to be_truthy
+
+      expect(doc.search('a').all? do |element|
+        element['href'].match('flv')
+      end).to be_falsey
+    end
  end

  describe "#convert_audio_tags!" do