diff --git a/Gemfile b/Gemfile index 91fcd546df..7a6e7a6af8 100644 --- a/Gemfile +++ b/Gemfile @@ -112,6 +112,15 @@ gem("xmlrpc") gem("fastimage") # for detecting file type of uploaded images gem("mimemagic") +# An interface between Ruby and ImageMagick/Vips +gem("image_processing") +# NOTE: gem "exiftool/exiftool_vendored" is for reading only +# This supports writing (plus reading) EXIF data, "vendors" latest exiftool. +# If reading/writing multiple files at once, switch to "multi_exiftool" gem. +gem("mini_exiftool_vendored") +# syncronize files between remote hosts by wrapping a call to the rsync binary +# used in transfers to the image server +gem("rsync") # for creating zip files # RubyZip 3.0 is coming! diff --git a/Gemfile.lock b/Gemfile.lock index a2140e1507..17f2653cc0 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -156,6 +156,9 @@ GEM hashdiff (1.1.0) i18n (1.14.5) concurrent-ruby (~> 1.0) + image_processing (1.12.2) + mini_magick (>= 4.9.5, < 5) + ruby-vips (>= 2.0.17, < 3) importmap-rails (2.0.1) actionpack (>= 6.0.0) activesupport (>= 6.0.0) @@ -184,6 +187,10 @@ GEM mimemagic (0.4.3) nokogiri (~> 1) rake + mini_exiftool (2.11.0) + mini_exiftool_vendored (9.2.7.v1) + mini_exiftool (>= 1.6.0) + mini_magick (4.12.0) mini_mime (1.1.5) mini_racer (0.12.0) libv8-node (~> 21.7.2.0) @@ -279,6 +286,7 @@ GEM chunky_png (~> 1.0) rqrcode_core (~> 1.0) rqrcode_core (1.2.0) + rsync (1.0.9) rtf (0.3.3) rubocop (1.64.1) json (~> 2.3) @@ -304,6 +312,8 @@ GEM rubocop-thread_safety (0.5.1) rubocop (>= 0.90.0) ruby-progressbar (1.13.0) + ruby-vips (2.2.1) + ffi (~> 1.12) rubyzip (2.3.2) sass-embedded (1.77.4-arm64-darwin) google-protobuf (>= 3.25, < 5.0) @@ -419,9 +429,11 @@ DEPENDENCIES debug fastimage i18n + image_processing importmap-rails jbuilder mimemagic + mini_exiftool_vendored mini_racer minitest minitest-reporters @@ -436,6 +448,7 @@ DEPENDENCIES redis (~> 4.0) requestjs-rails rqrcode + rsync rtf rubocop rubocop-performance diff --git a/app/models/image.rb b/app/models/image.rb index a7fc09a1a0..ce6c3fbc6d 100644 --- a/app/models/image.rb +++ b/app/models/image.rb @@ -528,6 +528,7 @@ def input_stream?(file) def init_image_from_local_file(file) @file = file raise("Weird: file.path is blank!") if file.path.blank? + self.upload_temp_file = file.path self.upload_length = file.size add_extra_attributes_from_file(file) @@ -709,15 +710,21 @@ def process_image(strip: false) update_attribute(:gps_stripped, true) if strip strip = strip ? "1" : "0" if move_original - cmd = MO.process_image_command. - gsub("", id.to_s). - gsub("", ext). - gsub("", set). - gsub("", strip) - if !Rails.env.test? && !system(cmd) - errors.add(:image, :runtime_image_process_failed.t(id: id)) - result = false - end + args = { + image: self, ext: ext, set_size: set, strip_gps: strip, user: user + } + processor = Image::Processor.new(args) + processor.process + + # cmd = MO.process_image_command. + # gsub("", id.to_s). + # gsub("", ext). + # gsub("", set). + # gsub("", strip) + # if !Rails.env.test? && !system(cmd) + # errors.add(:image, :runtime_image_process_failed.t(id: id)) + # result = false + # end else result = false end diff --git a/app/models/image/processor.rb b/app/models/image/processor.rb new file mode 100644 index 0000000000..ac0109eb75 --- /dev/null +++ b/app/models/image/processor.rb @@ -0,0 +1,397 @@ +# frozen_string_literal: true + +# Image::Processor. Not to be confused with the ImageProcessing gem's class. +# +# Methods +# `process` +# Resize and transfer uploaded images to the image server(s). +# It is intended to run asynchronously via Active Job. One of these +# jobs is spwaned for each image uploaded. It takes these steps: + +# 1. convert original to jpeg if necessary +# 2. reorient it correctly if necessary +# 3. set size of original image in database if 'set' flag used +# 4. create the five smaller-sized copies +# 5. copy all files to the image server(s) if in production mode +# 6. email webmaster if there were any errors + +# Original script ensures that no other processes are running ImageMagick or +# scp before it runs its own commands. If another is running, it sleeps a few +# seconds and tries again. +# +# This class could potentially also house `retransfer` and `rotate` methods + +class Image + class Processor + require "image_processing/mini_magick" + require "mini_exiftool_vendored" + require "fastimage" + require "rsync" + require "open-uri" + + # Constants for servers, paths and directories. + IMAGE_SUBDIRS = Image::URL::SUBDIRECTORIES.values.freeze + LOCAL_IMAGES_PATH = MO.local_image_files + # Where the private key is stored for scp. + PRIVATE_KEY_PATH = Rails.root.join(".ssh/id_rsa").to_s + # returns symbols! + IMAGE_SERVERS = MO.image_sources.each_key.to_a.freeze + # per-environment image server data + IMAGE_SERVER_DATA = image_server_data.freeze + + # Store Exiftool's database in a temporary directory. + MiniExiftool.pstore_dir = Rails.root.join("tmp").to_s + + # Source, destination sizes and quality settings for each conversion. + SIZE_CONVERSIONS = [ + ["full_size", "huge", 1280, 93], + ["huge", "large", 960, 94], + ["huge", "medium", 640, 95], # medium = half of huge + ["medium", "small", 320, 95], + ["small", "thumbnail", 160, 95] + ].freeze + + # NOTE: must use Addressable::URI to get "user@host:port" `authority` + def image_server_data + data = { + local: { + url: "file://#{LOCAL_IMAGES_PATH}", + type: "file", + path: LOCAL_IMAGES_PATH, + subdirs: IMAGE_SUBDIRS + } + } + + MO.image_sources.each do |server, specs| + next unless specs[:write] + + url = format(specs[:write], root: MO.root) + parsed = Addressable::URI.parse(url) + data[server] = { + url: url, + type: parsed.scheme, + path: parsed.authority + parsed.path, + subdirs: specs[:sizes] || IMAGE_SUBDIRS + } + end + data.freeze + end + + def initialize(args = {}) + @image = args[:image] + raise(:process_image_no_image.t) unless @image + + @user = args[:user] || @image.user + raise(:process_image_no_user.t) unless @user + + @ext = args[:ext] || @image.original_extension + raise(:process_image_no_ext.t) unless @ext + + @id = @image.id + @set_size = args[:set_size] || false + @strip_gps = args[:strip_gps] || false + + @transferred_any = 0 + @errors = [] + end + + def process + # for debugging + # perform_desc = "#{@id}, #{@ext}, #{@set_size}, #{@strip_gps}" + # log("Starting Image::Processor.process(#{perform_desc})") + + # image.update_attribute(:upload_status, "pending") + convert_raw_to_jpg if @ext != "jpg" + strip_gps_from_file(full_size_filepath) if @strip_gps + auto_orient_if_needed(full_size_filepath) + update_image_record_width_height_and_transferred if @set_size + make_file_sizes + transfer_files_to_image_servers + mark_image_record_transferred_and_touch_obs if @transferred_any + email_webmaster if @errors.any? + + # for debugging + # log("Done with Image::Processor.process(#{perform_args})") + end + + # Strip GPS data + def strip_gps_from_file(file) + working = MiniExiftool.new(file) + working["GPS:all"] = nil + working["XMP:Geotag"] = nil + working.save + end + + def rotate(orientation) + make_sure_we_have_full_size_locally + reset_file_orientation + transform_full_size_file(orientation) + update_image_record_width_height_and_transferred + process + end + + def transfer_files_to_image_servers + return if Rails.env.development? + + IMAGE_SERVERS.each do |server| + transfer_all_sizes_to_server_subdirectories(server) + end + end + + def self.retransfer_images + images = Image.where(transferred: false) + + images.each do |image| + processor = new(image: image) + processor.transfer_files_to_image_servers + if @transferred_any + processor.mark_image_record_transferred_and_touch_obs + end + end + end + + private + + def make_sure_we_have_full_size_locally + return if File.exist?(full_size_filepath) + + # return unless (servers = IMAGE_SERVERS.excluding(:local)) + IMAGE_SERVERS.each do |server| + next unless image_server_has_subdir?(server, "orig") + + copy_file_from_server(server, "orig/#{@id}.jpg") + break + end + end + + def reset_file_orientation + working = MiniExiftool.new(full_size_filepath, numerical: true) + return unless working.orientation.to_i != 1 + + # This should reset the orientation to 1 from the original data. + working.copy_tags_from(full_size_filepath, "all") + return unless working.orientation.to_i != 1 + + working.orientation = 1 + working.save + end + + def transform_full_size_file(orientation) + operations = %w[-90 +90 180 -h -v] + nil unless operations.include?(orientation) + + pipeline = ImageProcessing::MiniMagick.source(full_size_file) + + if %w[-90 +90 180].include?(orientation) + pipeline.rotate(orientation) + elsif orientation == "-h" + pipeline.flop + elsif orientation == "-v" + pipeline.flip + end + pipeline.call(destination: full_size_filepath) + end + + # Note this also calls strip_gps_from_file(original_filepath). + def convert_raw_to_jpg + pipeline = ImageProcessing::MiniMagick.source(original_filepath). + append("-quality", 90). + append("-auto-orient"). + saver(allow_splitting: true). + convert("jpg") + + pipeline.call(destination: full_size_filepath) + + # If there were multiple layers, ImageMagick saves them as 1234-N.jpg. + unless File.exist?(full_size_filepath) + biggest_layer = Dir.glob("#{LOCAL_IMAGES_PATH}/orig/#{@id}-*.jpg").first + if File.exist?(biggest_layer) + # Take the first one, and delete the rest. + File.write(full_size_filepath, File.read(biggest_layer)) + File.delete(Dir.glob("#{LOCAL_IMAGES_PATH}/orig/#{@id}-*.jpg")) + end + end + + # Strip GPS out of header of original_file if hiding coordinates. + strip_gps_from_file(original_filepath) if @strip_gps + end + + def auto_orient_if_needed(filepath) + file_to_orient = MiniMagick::Image.open(filepath) + original_orientation = file_to_orient["%[orientation]"] + file_to_orient.auto_orient + new_orientation = file_to_orient["%[orientation]"] + + file_to_orient.write(filepath) if original_orientation != new_orientation + end + + # This also sets transferred to false to save db writes. + # Needed in rotate, and later overwritten in process. + def update_image_record_width_height_and_transferred + width, height = FastImage.size(full_size_filepath) + @image.update(width: width, height: height, transferred: false) + end + + def make_file_sizes + SIZE_CONVERSIONS.each do |source, destination, size, quality| + convert_source_to_destination(source, destination, size, quality) + end + end + + def convert_source_to_destination(source, destination, size, quality = 95) + source_file = send(:"#{source}_file") + destination_file = send(:"#{destination}_file") + pipeline = ImageProcessing::MiniMagick.source(source_file). + append("-thumbnail", "#{size}x#{size}>"). + append("-quality", quality). + convert("jpg") + + pipeline.call(destination: destination_file) + end + + def transfer_all_sizes_to_server_subdirectories(server) + subdirs = IMAGE_SERVER_DATA[server][:subdirs] + IMAGE_SUBDIRS.each do |subdir| + if subdirs.include?(subdir) + copy_file_to_server(server, "#{subdir}/#{@id}.jpg") + end + end + if @ext != "jpg" && subdirs.include?("orig") + copy_file_to_server(server, "orig/#{@id}.#{@ext}") + end + @transferred_any = 1 + end + + # Mark image as transferred and touch related obs (for caches) if all good + def mark_image_record_transferred_and_touch_obs + @image.update( + transferred: @transferred_any + # upload_status: "success" + ) + Observation.joins(:observation_images). + where(observation_images: { image_id: @id }).touch_all + end + + # Email webmaster if there were any errors + def email_webmaster + QueuedEmail::Webmaster.create_email( + sender_email: @user.email, + subject: "[MO] process_image", + content: @errors.join("\n") + ) + end + + def image_server_has_subdir?(server, subdir) + IMAGE_SERVER_DATA[server][:subdirs].include?(subdir) + end + + # Original file locations + def original_filepath + "#{LOCAL_IMAGES_PATH}/orig/#{@id}.#{@ext}" + end + + # full_size, huge, large, medium, small, thumbnail + Image::URL::SUBDIRECTORIES.each do |size, subdir| + define_method(:"#{size}_filepath") do + "#{LOCAL_IMAGES_PATH}/#{subdir}/#{@id}.jpg" + end + end + + ############################################################ + + def copy_file_to_server(server, local_file, remote_file = local_file) + case IMAGE_SERVER_DATA[server][:type] + when "file" + copy_file_to_local_server(server, local_file, remote_file) + when "ssh" + copy_file_to_remote_server(server, local_file, remote_file) + else + raise("Unknown image server type: #{IMAGE_SERVER_DATA[server][:type]}") + end + end + + def copy_file_to_local_server(server, local_file, remote_file) + return unless (remote_path = IMAGE_SERVER_DATA[server][:path]) + + FileUtils.cp("#{LOCAL_IMAGES_PATH}/#{local_file}", + "#{remote_path}/#{remote_file}") + end + + # Rsync is used to copy files to the image server(s). + def copy_file_to_remote_server(server, local_file, remote_file) + return unless (remote_path = IMAGE_SERVER_DATA[server][:path]) + + Rsync.run("#{LOCAL_IMAGES_PATH}/#{local_file}", + "#{remote_path}/#{remote_file}") do |result| + if result.success? + # result.changes.each do |change| + # puts("#{change.filename} (#{change.summary})") + # end + else + @errors << result.error + end + end + end + + # This method could potentially present a security risk depending on how the + # remote_file parameter is being passed. If an attacker can control the + # remote_file parameter, they could potentially use path traversal attacks + # (../) to read arbitrary files from the remote server if the server is not + # properly configured to prevent this. + + # To mitigate this risk, you should: Ensure that the remote_file parameter + # is properly sanitized before it's used. For example, you could ensure that + # it doesn't contain any ../ sequences or other special characters that + # could be used in a path traversal attack. + + # Consider using a secure method to generate the local file path, rather + # than directly using the remote_file parameter. For example, you could use + # a hash of the remote_file parameter, or generate a random filename. + + # Make sure that the remote server is properly configured to prevent path + # traversal attacks. For example, it should not allow requests for paths + # that contain ../ or other special sequences. + + # Always use secure connections (HTTPS) when transferring files to prevent + # man-in-the-middle attacks. + def copy_file_from_server(server, remote_file) + case IMAGE_SERVER_DATA[server][:type] + when "file" + copy_file_from_local_server(server, remote_file) + when "ssh" + copy_file_from_remote_server(server, remote_file) + when "http" + copy_file_from_http_server(server, remote_file) + else + raise("Don't know how to get #{remote_file} from #{server} via: " \ + "#{IMAGE_SERVER_DATA[server][:type]}") + end + end + + def copy_file_from_local_server(server, remote_file) + return unless (remote_path = IMAGE_SERVER_DATA[server][:path]) + + FileUtils.cp("#{remote_path}/#{remote_file}", + "#{LOCAL_IMAGES_PATH}/#{remote_file}") + end + + def copy_file_from_remote_server(server, remote_file) + return unless (remote_path = IMAGE_SERVER_DATA[server][:path]) + + Rsync.run("#{remote_path}/#{remote_file}", + "#{LOCAL_IMAGES_PATH}/#{remote_file}") + end + + def copy_file_from_http_server(server, remote_file) + return unless (remote_path = IMAGE_SERVER_DATA[server][:path]) + + case io = OpenURI.open_uri("#{remote_path}/#{remote_file}") + when StringIO + File.write("#{LOCAL_IMAGES_PATH}/#{remote_file}", io.read) + when Tempfile + io.close + FileUtils.mv(io.path, "#{LOCAL_IMAGES_PATH}/#{remote_file}") + end + end + end +end