326 lines
10 KiB
Ruby
326 lines
10 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
#
|
|
# Copyright (C) 2011 - present Instructure, Inc.
|
|
#
|
|
# This file is part of Canvas.
|
|
#
|
|
# Canvas is free software: you can redistribute it and/or modify it under
|
|
# the terms of the GNU Affero General Public License as published by the Free
|
|
# Software Foundation, version 3 of the License.
|
|
#
|
|
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
|
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
|
# details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License along
|
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
|
|
# This is used to take a zipped file, unzip it, add directories to a
|
|
# context, and attach the files in the correct directories.
|
|
class UnzipAttachment
|
|
THINGS_TO_IGNORE_REGEX = /^(__MACOSX|thumbs\.db|\.DS_Store)$/
|
|
|
|
def self.process(opts = {})
|
|
@ua = new(opts)
|
|
@ua.process
|
|
@ua
|
|
end
|
|
|
|
attr_reader :context, :filename, :root_folders, :context_files_folder
|
|
attr_accessor :progress_proc
|
|
|
|
# for backwards compatibility
|
|
def course
|
|
context
|
|
end
|
|
|
|
def course_files_folder
|
|
context_files_folder
|
|
end
|
|
|
|
def initialize(opts = {})
|
|
@context = opts[:course] || opts[:context]
|
|
@filename = opts[:filename]
|
|
# opts[:callback] is for backwards-compatibility, it's just a progress proc
|
|
# that doesn't expect any argument giving it the percent progress
|
|
@progress_proc = opts[:callback]
|
|
@context_files_folder = opts[:root_directory] || Folder.root_folders(@context).first
|
|
@valid_paths = opts[:valid_paths]
|
|
@logger ||= opts[:logger]
|
|
@rename_files = !!opts[:rename_files]
|
|
@migration_id_map = opts[:migration_id_map] || {}
|
|
|
|
raise ArgumentError, "Must provide a context." unless context&.is_a_context?
|
|
raise ArgumentError, "Must provide a filename." unless filename
|
|
raise ArgumentError, "Must provide a context files folder." unless context_files_folder
|
|
end
|
|
|
|
def update_progress(pct)
|
|
return unless @progress_proc
|
|
|
|
if @progress_proc.arity == 0
|
|
# for backwards compatibility with callback procs that expect no arguments
|
|
@progress_proc.call
|
|
else
|
|
@progress_proc.call(pct)
|
|
end
|
|
end
|
|
|
|
def logger
|
|
@logger ||= Rails.logger
|
|
end
|
|
|
|
# For all files in a zip file,
|
|
# 1) create a folder in the context like the one in the zip file, if necessary
|
|
# 2) create a unique filename to store the file
|
|
# 3) extract the file into the unique filename
|
|
# 4) attach the file to the context, in the appropriate folder, with a decent display name
|
|
#
|
|
# E.g.,
|
|
# the zipfile has some_entry/some_file.txt
|
|
# the context will have root_folder/some_entry added to its folder structure
|
|
# the filesystem will get an empty file called something like:
|
|
# /tmp/some_file.txt20091012-16997-383kbv-0
|
|
# the contents of some_entry/some_file.txt in the zip file will be extracted to
|
|
# /tmp/some_file.txt20091012-16997-383kbv-0
|
|
# The context will get the contents of this file added to a new attachment called 'Some file.txt'
|
|
# added to the root_folder/some_entry folder in the database
|
|
# Tempfile will unlink its new file as soon as f is garbage collected.
|
|
def process
|
|
Folder.reset_path_lookups!
|
|
with_unzip_configuration do
|
|
zip_stats.validate_against(context)
|
|
|
|
id_positions = {}
|
|
path_positions = zip_stats.paths_with_positions(last_position)
|
|
not_created_folders = []
|
|
CanvasUnzip.extract_archive(filename) do |entry, index|
|
|
next if should_skip?(entry)
|
|
|
|
folder_path_array = path_elements_for(@context_files_folder.full_name)
|
|
entry_path_array = path_elements_for(entry.name)
|
|
entry_path_array.pop
|
|
|
|
folder_path_array += entry_path_array
|
|
folder_name = folder_path_array.join("/")
|
|
display_name = display_name(entry.name)
|
|
if not_created_folders.include?(folder_name)
|
|
@logger&.warn "Couldn't create file #{display_name}: #{folder_name} not created"
|
|
next
|
|
end
|
|
begin
|
|
folder = Folder.assert_path(folder_name, @context)
|
|
rescue ActiveRecord::StatementInvalid => e
|
|
@logger&.warn "Couldn't create sub-folder #{folder_name}: #{e.message}"
|
|
not_created_folders << folder_name
|
|
next
|
|
end
|
|
|
|
update_progress(zip_stats.percent_complete(index))
|
|
|
|
# Hyphenate the path. So, /some/file/path becomes some-file-path
|
|
# Since Tempfile guarantees that the names are unique, we don't
|
|
# have to worry about what this name actually is.
|
|
Tempfile.open do |f|
|
|
file_size = 0
|
|
sha512 = entry.extract(f.path, true) do |bytes|
|
|
file_size += bytes
|
|
end
|
|
zip_stats.charge_quota(file_size)
|
|
# This is where the attachment actually happens. See file_in_context.rb
|
|
migration_id = @migration_id_map[entry.name]
|
|
attachment = attach(f.path, entry, folder, sha512, migration_id:)
|
|
id_positions[attachment.id] = path_positions[entry.name]
|
|
rescue Attachment::OverQuotaError
|
|
f.unlink
|
|
raise
|
|
rescue ActiveRecord::StatementInvalid => e
|
|
@logger&.warn "Couldn't create file #{display_name}: #{e.message}"
|
|
rescue => e
|
|
@logger&.warn "Couldn't unzip archived file #{display_name}: #{e.message}"
|
|
end
|
|
end
|
|
update_attachment_positions(id_positions)
|
|
end
|
|
|
|
@context.touch
|
|
update_progress(1.0)
|
|
end
|
|
|
|
def zip_stats
|
|
@zip_stats ||= ZipFileStats.new(filename)
|
|
end
|
|
|
|
def update_attachment_positions(id_positions)
|
|
updates = id_positions.inject([]) do |memo, (id, position)|
|
|
memo.tap { |m| m << "WHEN id=#{id} THEN #{position}" if id && position }
|
|
end
|
|
|
|
if updates.any?
|
|
Attachment.where(id: id_positions.keys).update_all("position=CASE #{updates.join(" ")} ELSE position END")
|
|
end
|
|
end
|
|
|
|
def attach(path, entry, folder, md5, migration_id: nil)
|
|
FileInContext.attach(context,
|
|
path,
|
|
display_name: display_name(entry.name),
|
|
folder:,
|
|
explicit_filename: File.split(entry.name).last,
|
|
allow_rename: @rename_files,
|
|
md5:,
|
|
migration_id:)
|
|
rescue
|
|
FileInContext.attach(context,
|
|
path,
|
|
display_name: display_name(entry.name),
|
|
folder:,
|
|
explicit_filename: File.split(entry.name).last,
|
|
allow_rename: @rename_files,
|
|
md5:,
|
|
migration_id:)
|
|
end
|
|
|
|
def with_unzip_configuration
|
|
Attachment.skip_touch_context(true)
|
|
Attachment.skip_3rd_party_submits(true)
|
|
FileInContext.queue_files_to_delete(true)
|
|
begin
|
|
yield
|
|
ensure
|
|
Attachment.skip_touch_context(false)
|
|
Attachment.skip_3rd_party_submits(false)
|
|
FileInContext.queue_files_to_delete(false)
|
|
FileInContext.destroy_queued_files
|
|
end
|
|
end
|
|
|
|
def last_position
|
|
@last_position ||= @context.attachments.active.filter_map(&:position).last || 0
|
|
end
|
|
|
|
def should_skip?(entry)
|
|
entry.directory? ||
|
|
entry.name.split("/").any? { |p| p =~ THINGS_TO_IGNORE_REGEX } ||
|
|
(@valid_paths && !@valid_paths.include?(entry.name))
|
|
end
|
|
|
|
def path_elements_for(path)
|
|
list = File.split(path) rescue []
|
|
list.shift if list[0] == "."
|
|
list
|
|
end
|
|
|
|
protected
|
|
|
|
# Creates a title-ized name from a path.
|
|
# So, display_name(/tmp/foo/bar_baz) generates 'Bar baz'
|
|
def display_name(path)
|
|
File.split(path).last
|
|
end
|
|
|
|
# Finds the folder in the database, creating the path if necessary
|
|
def infer_folder(path)
|
|
list = path.split("/")
|
|
current = (@root_directory ||= folders.root_directory)
|
|
# For every directory in the path...
|
|
# (-2 means all entries but the last, which should be a filename)
|
|
list[0..-2].each do |dir|
|
|
current = if (new_dir = current.sub_folders.where(name: dir).first)
|
|
new_dir
|
|
else
|
|
assert_folder(current, dir)
|
|
end
|
|
end
|
|
current
|
|
end
|
|
|
|
# Actually creates the folder in the database.
|
|
def assert_folder(root, dir)
|
|
folder = Folder.new(parent_folder_id: root.id, name: dir)
|
|
folder.context = context
|
|
folder.save!
|
|
folder
|
|
end
|
|
|
|
# A cached list of folders that we know about.
|
|
# Used by infer_folder to know whether to create a folder or not.
|
|
def folders(reset = false)
|
|
@folders = nil if reset
|
|
return @folders if @folders
|
|
|
|
@folders = OpenStruct.new(root_directory: context_files_folder)
|
|
end
|
|
end
|
|
|
|
# this is just a helper class that wraps an archive
|
|
# for just the duration of this operation; it doesn't
|
|
# quite seem appropriate to move it to it's own file
|
|
# since it's such an integral part of the unzipping
|
|
# process
|
|
class ZipFileStats
|
|
MAX_FILE_COUNT = 250_000
|
|
attr_reader :file_count, :total_size, :paths, :filename, :quota_remaining
|
|
|
|
def initialize(filename)
|
|
@filename = filename
|
|
@paths = []
|
|
@file_count = 0
|
|
@total_size = 0
|
|
@quota_remaining = nil
|
|
process!
|
|
end
|
|
|
|
def validate_against(context)
|
|
if file_count > MAX_FILE_COUNT
|
|
raise ArgumentError, "Zip File cannot have more than #{MAX_FILE_COUNT} entries"
|
|
end
|
|
|
|
# check whether the nominal size of the zip's contents would exceed
|
|
# quota, and reject the zip immediately if so
|
|
quota_hash = Attachment.get_quota(context)
|
|
if quota_hash[:quota] > 0
|
|
if (quota_hash[:quota_used] + total_size) > quota_hash[:quota]
|
|
raise Attachment::OverQuotaError, "Zip file would exceed quota limit"
|
|
end
|
|
|
|
@quota_remaining = quota_hash[:quota] - quota_hash[:quota_used]
|
|
end
|
|
end
|
|
|
|
# since the central directory can lie, track quota during extraction as well
|
|
# to prevent zip bomb denial-of-service attacks
|
|
def charge_quota(size)
|
|
return if @quota_remaining.nil?
|
|
if size > @quota_remaining
|
|
raise Attachment::OverQuotaError, "Zip contents exceed course quota limit"
|
|
end
|
|
|
|
@quota_remaining -= size
|
|
end
|
|
|
|
def paths_with_positions(base)
|
|
positions_hash = {}
|
|
paths.sort.each_with_index { |p, idx| positions_hash[p] = idx + base }
|
|
positions_hash
|
|
end
|
|
|
|
def percent_complete(current_index)
|
|
(current_index + 1).to_f / file_count.to_f
|
|
end
|
|
|
|
private
|
|
|
|
def process!
|
|
CanvasUnzip.extract_archive(filename) do |entry|
|
|
@file_count += 1
|
|
@total_size += [entry.size, Attachment::MINIMUM_SIZE_FOR_QUOTA].max
|
|
@paths << entry.name
|
|
end
|
|
@file_count = 1 if @file_count == 0
|
|
end
|
|
end
|