Commit a443f677 authored by Micaël Bergeron's avatar Micaël Bergeron

wip: making the fetch paginated

parent ae91c723
...@@ -4,6 +4,8 @@ require 'csv' ...@@ -4,6 +4,8 @@ require 'csv'
require 'yaml' require 'yaml'
module Pseudonymizer module Pseudonymizer
PAGE_SIZE = 10000
class Anon class Anon
def initialize(fields) def initialize(fields)
@anon_fields = fields @anon_fields = fields
...@@ -43,7 +45,8 @@ module Pseudonymizer ...@@ -43,7 +45,8 @@ module Pseudonymizer
new_tables = tables.map do |k, v| new_tables = tables.map do |k, v|
@schema[k] = {} @schema[k] = {}
table_to_csv(k, v["whitelist"], v["pseudo"]) table_to_schema(k)
write_to_csv_file(k, table_page_results(k, v['whitelist'], v['pseudo']))
end end
schema_to_yml schema_to_yml
...@@ -68,14 +71,33 @@ module Pseudonymizer ...@@ -68,14 +71,33 @@ module Pseudonymizer
File.open(file_path, 'w') { |file| file.write(@output_files.to_json) } File.open(file_path, 'w') { |file| file.write(@output_files.to_json) }
end end
def table_to_csv(table, whitelist_columns, pseudonymity_columns) # yield every results, pagined, anonymized
sql = "SELECT #{whitelist_columns.join(",")} FROM #{table};" def table_page_results(table, whitelist_columns, pseudonymity_columns)
results = ActiveRecord::Base.connection.exec_query(sql) anonymizer = Anon.new(pseudonymity_columns)
page = 0
Enumerator.new do |yielder|
loop do
offset = page * PAGE_SIZE
sql = "SELECT #{whitelist_columns.join(",")} FROM #{table} LIMIT #{PAGE_SIZE} OFFSET #{offset};"
# a page of results
results = ActiveRecord::Base.connection.exec_query(sql)
break if results.empty?
binding.pry
anonymizer.anonymize(results).each { |result| yielder << result }
page += 1
end
end
end
def table_to_schema(table)
type_results = ActiveRecord::Base.connection.columns(table) type_results = ActiveRecord::Base.connection.columns(table)
type_results = type_results.select do |c| type_results = type_results.select do |c|
@config["tables"][table]["whitelist"].include?(c.name) @config["tables"][table]["whitelist"].include?(c.name)
end end
type_results = type_results.map do |c| type_results = type_results.map do |c|
data_type = c.sql_type data_type = c.sql_type
...@@ -86,10 +108,6 @@ module Pseudonymizer ...@@ -86,10 +108,6 @@ module Pseudonymizer
{ name: c.name, data_type: data_type } { name: c.name, data_type: data_type }
end end
set_schema_column_types(table, type_results) set_schema_column_types(table, type_results)
return if results.empty?
anon = Anon.new(pseudonymity_columns)
write_to_csv_file(table, anon.anonymize(results))
end end
def set_schema_column_types(table, type_results) def set_schema_column_types(table, type_results)
...@@ -103,14 +121,15 @@ module Pseudonymizer ...@@ -103,14 +121,15 @@ module Pseudonymizer
def write_to_csv_file(title, contents) def write_to_csv_file(title, contents)
Rails.logger.info "Writing #{title} ..." Rails.logger.info "Writing #{title} ..."
file_path = get_and_log_file_name("csv", title) file_path = get_and_log_file_name("csv", title)
binding.pry
column_names = contents.first.keys column_names = contents.first.keys
contents = CSV.generate do |csv| CSV.open(file_path, 'w') do |csv|
csv << column_names csv << column_names
contents.each do |x| contents.each do |x|
csv << x.values csv << x.values
end end
end end
File.open(file_path, 'w') { |file| file.write(contents) }
file_path file_path
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment