diff --git a/lib/tasks/schools.rake b/lib/tasks/schools.rake index 0b1f2c6c91..f3aea5f82f 100644 --- a/lib/tasks/schools.rake +++ b/lib/tasks/schools.rake @@ -79,6 +79,16 @@ namespace :schools do ) # rubocop:enable Rails/SaveBang + # Some URLs from the GIAS CSV are missing the protocol. + process_url = ->(url) do + return nil if url.blank? + url.start_with?("http://", "https://") ? url : "https://#{url}" + + # Legh Vale school has a URL of http:www.leghvale.st-helens.sch.uk + # which is not a valid URL. + url.gsub!("http:www", "http://www") + end + CSV.parse( csv_content, headers: true, @@ -96,15 +106,15 @@ namespace :schools do town: row["Town"], county: row["County (name)"], postcode: row["Postcode"], - url: row["SchoolWebsite"] + url: process_url.call(row["SchoolWebsite"].presence) ) if locations.size >= batch_size - Location.import locations, - on_duplicate_key_update: { - conflict_target: [:urn], - columns: %i[name address town county postcode url] - } + Location.import! locations, + on_duplicate_key_update: { + conflict_target: [:urn], + columns: %i[name address town county postcode url] + } locations.clear end @@ -113,11 +123,11 @@ namespace :schools do # Import remaining locations in the last incomplete batch unless locations.empty? - Location.import locations, - on_duplicate_key_update: { - conflict_target: [:urn], - columns: %i[name address town county postcode url] - } + Location.import! locations, + on_duplicate_key_update: { + conflict_target: [:urn], + columns: %i[name address town county postcode url] + } end end