Commit 1ed0b801 authored by Micaël Bergeron's avatar Micaël Bergeron

wip: making the query paginated

parent 7d5f50f4
...@@ -4,7 +4,7 @@ require 'csv' ...@@ -4,7 +4,7 @@ require 'csv'
require 'yaml' require 'yaml'
module Pseudonymizer module Pseudonymizer
PAGE_SIZE = 1000 PAGE_SIZE = 10000
class Anon class Anon
def initialize(fields) def initialize(fields)
...@@ -73,8 +73,7 @@ module Pseudonymizer ...@@ -73,8 +73,7 @@ module Pseudonymizer
table_to_schema(table) table_to_schema(table)
write_to_csv_file(table, table_page_results(table, whitelist_columns, pseudonymity_columns)) write_to_csv_file(table, table_page_results(table, whitelist_columns, pseudonymity_columns))
rescue => e rescue => e
binding.pry Rails.logger.error("Failed to export #{table}: #{e}")
Rails.logger.error(e.message)
end end
# yield every results, pagined, anonymized # yield every results, pagined, anonymized
...@@ -91,16 +90,16 @@ module Pseudonymizer ...@@ -91,16 +90,16 @@ module Pseudonymizer
# a page of results # a page of results
results = ActiveRecord::Base.connection.exec_query(sql) results = ActiveRecord::Base.connection.exec_query(sql)
raise StopIteration if results.empty? anonymizer.anonymize(results).each do |result|
anonymizer.anonymize(results).lazy.each do |result|
has_more = true has_more = true
yielder << result yielder << result
end end
raise StopIteration unless has_more
page += 1 page += 1
end end
end end.lazy
end end
def table_to_schema(table) def table_to_schema(table)
...@@ -141,8 +140,6 @@ module Pseudonymizer ...@@ -141,8 +140,6 @@ module Pseudonymizer
end end
end end
GC.start
file_path file_path
end end
......
...@@ -9,15 +9,11 @@ module Pseudonymizer ...@@ -9,15 +9,11 @@ module Pseudonymizer
end end
def output_dir def output_dir
File.join('/tmp', 'gitlab-pseudonymizer', self.start_at.iso8601) File.join('/tmp', 'gitlab-pseudonymizer', start_at.iso8601)
end end
def upload_dir def upload_dir
File.join(self.start_at.iso8601) File.join(start_at.iso8601)
end
def object_store_credentials
config.upload.connection.to_hash.deep_symbolize_keys
end end
end end
end end
...@@ -2,12 +2,21 @@ module Pseudonymizer ...@@ -2,12 +2,21 @@ module Pseudonymizer
class Uploader class Uploader
RemoteStorageUnavailableError = Class.new(StandardError) RemoteStorageUnavailableError = Class.new(StandardError)
def self.object_store_credentials
Gitlab.config.pseudonymizer.upload.connection.to_hash.deep_symbolize_keys
end
def self.remote_directory
Gitlab.config.pseudonymizer.upload.remote_directory
end
def initialize(options, progress = nil) def initialize(options, progress = nil)
@progress = progress || $stdout @progress = progress || $stdout
@config = options.config @config = options.config
@output_dir = options.output_dir @output_dir = options.output_dir
@upload_dir = options.upload_dir @upload_dir = options.upload_dir
@connection_params = options.object_store_credentials @remote_dir = self.class.remote_directory
@connection_params = self.class.object_store_credentials
end end
def upload def upload
...@@ -57,15 +66,14 @@ module Pseudonymizer ...@@ -57,15 +66,14 @@ module Pseudonymizer
def connect_to_remote_directory def connect_to_remote_directory
# our settings use string keys, but Fog expects symbols # our settings use string keys, but Fog expects symbols
connection = ::Fog::Storage.new(@connection_params) connection = ::Fog::Storage.new(@connection_params)
remote_dir = @config.upload.remote_directory
# We only attempt to create the directory for local backups. For AWS # We only attempt to create the directory for local backups. For AWS
# and other cloud providers, we cannot guarantee the user will have # and other cloud providers, we cannot guarantee the user will have
# permission to create the bucket. # permission to create the bucket.
if connection.service == ::Fog::Storage::Local if connection.service == ::Fog::Storage::Local
connection.directories.create(key: remote_dir) connection.directories.create(key: @remote_dir)
else else
connection.directories.get(remote_dir) connection.directories.get(@remote_dir)
end end
end end
......
...@@ -93,9 +93,7 @@ namespace :gitlab do ...@@ -93,9 +93,7 @@ namespace :gitlab do
def progress def progress
if ENV['CRON'] if ENV['CRON']
# We need an object we can say 'puts' and 'print' to; let's use a # Do not output progress for Cron
# StringIO.
require 'stringio'
StringIO.new StringIO.new
else else
$stdout $stdout
......
...@@ -64,7 +64,7 @@ module StubObjectStorage ...@@ -64,7 +64,7 @@ module StubObjectStorage
end end
def stub_object_storage_pseudonymizer(options:) def stub_object_storage_pseudonymizer(options:)
stub_object_storage(connection_params: options.object_store_credentials, stub_object_storage(connection_params: Pseudonymizer::Uploader.object_store_credentials,
remote_directory: options.config.upload.remote_directory) remote_directory: options.config.upload.remote_directory)
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment