new comands for staging and reset

parent 963e3e0d
This diff is collapsed.
......@@ -35,12 +35,9 @@ module Embulk
page.each do |record|
reference = record[0]
data_chunk = Base64.decode64(record[1])
data_set_directory = @output_path.end_with?("/") ? @output_path : @output_path + "/"
ref = reference.reverse.sub("/".reverse, ".".reverse).reverse.sub(record[2]+"/", "")
if ref.end_with?(".none")
ref = ref[0...-5]
end
file_path = data_set_directory + ref
@dataset_utils = DatasetUtils.new("")
data_set_directory = @dataset_utils.appendSlashTo(@output_path)
file_path = @dataset_utils.referenceToPath(reference, data_set_directory, record[2])
write_mode = 'ab'
if record[3] == DatasetUtils::DELETE
File.delete(file_path) if File.exist?(file_path)
......@@ -48,7 +45,7 @@ module Embulk
if record[3] == TRUE.to_s
write_mode = 'w'
end
dirname = File.dirname(data_set_directory + ref)
dirname = File.dirname(file_path)
unless File.directory?(dirname)
FileUtils.mkdir_p(dirname)
end
......
......@@ -46,11 +46,12 @@ module Embulk
hash = record[7]
begin
if eof == DatasetUtils::DELETE
reference = [dataset, filename, extension].join("/")
reference = [dataset, filename, extension].join(DatasetUtils::REFERENCE_SEPARATOR)
@wendelin.delete(reference)
else
reference = [supplier, dataset, filename, extension, eof, size, hash].join("/")
if not @wendelin.ingest(reference, data_chunk)
reference = [supplier, dataset, filename, extension, eof, size, hash].join(DatasetUtils::REFERENCE_SEPARATOR)
split = eof != ""
if not @wendelin.ingest(reference, data_chunk, split)
raise "could not ingest"
end
end
......
require_relative '../filelogger'
require_relative '../dataset_utils'
class Index
include Singleton
......@@ -19,21 +20,20 @@ module Embulk
class BinaryParserPlugin < ParserPlugin
Plugin.register_parser("binary", self)
CHUNK_SIZE = 50
MEGA = 1000000
EOF = "EOF"
def self.transaction(config, &control)
tool_dir = config.param('tool_dir', :string, default: ".")
@logger = LogManager.instance()
@logger.setFilename(tool_dir, "parser")
task = {
chunk_size: config.param('chunk_size', :float, default: CHUNK_SIZE) * MEGA,
chunk_size: config.param('chunk_size', :float, default: 0) * DatasetUtils::MEGA,
supplier: config.param("supplier", :string, default: "parser"),
data_set: config.param("data_set", :string),
input_plugin: config.param("storage", :string, default: "parser"),
date: Time.now.strftime("%Y-%m-%d_%H-%M-%S")
}
if task['chunk_size'] == 0
task['chunk_size'] = DatasetUtils::CHUNK_SIZE
end
columns = [
Column.new(0, "supplier", :string),
Column.new(1, "data_set", :string),
......@@ -71,7 +71,7 @@ module Embulk
end
private
def each_chunk(file, filename, chunk_size=CHUNK_SIZE)
def each_chunk(file, filename, chunk_size=DatasetUtils::CHUNK_SIZE)
extension = @index.to_s.rjust(3, "0")
npart = 0
next_byte = file.read(1)
......@@ -89,7 +89,7 @@ module Embulk
data += file.read(chunk_size)
next_byte = file.read(1)
if not next_byte
eof = EOF
eof = DatasetUtils::EOF
if first
# this means that the whole file will be ingested at once (not split)
eof = ""
......
......@@ -23,6 +23,9 @@ class WendelinClient
rescue Exception => e
@logger.error("An error occurred while checking if reference exists: " + e.to_s)
@logger.error(e.backtrace)
if e.to_s.include? "Unauthorized" or e.to_s.include? "401"
raise e
end
return FALSE
else
return res.to_s == 'TRUE'
......@@ -53,27 +56,27 @@ class WendelinClient
end
end
def ingest(reference, data_chunk)
def ingest(reference, data_chunk, split)
@logger.info("Ingestion reference: #{reference}", print=TRUE)
if Time.new - @last_ingestion < 2
# avoid send ingestions to close (specially for split ones)
sleep 2
if split and Time.new - @last_ingestion < 3
# avoid to send split ingestions to close
sleep 3
end
if exists(reference)
@logger.info("There is another ingestion already done for the pair data_set-filename. Reference "\
@logger.info("There is another ingestion already done for the pair dataset-filename. Reference "\
+ reference, print=TRUE)
@logger.info("Rename your reference or delete the older ingestion.", print=TRUE)
@logger.info("Rename your file or download the full dataset to make local changes.", print=TRUE)
return FALSE
end
if reference.include? "#" or reference.include? "+"
raise "Invalid chars in file name. Please rename it."
raise "invalid chars in file name. Please rename it."
end
begin
uri = URI("#{@erp5_url}/ingest?reference=#{reference}")
rescue Exception => e
@logger.error("An error occurred while generating url: " + e.to_s)
@logger.error(e.backtrace)
raise "Invalid chars in file name. Please rename it."
raise "invalid chars in file name. Please rename it."
end
response = handleRequest(uri, reference, data_chunk)
if response == FALSE
......@@ -138,7 +141,7 @@ class WendelinClient
res = Net::HTTP.start(uri.hostname, uri.port,
:use_ssl => (uri.scheme == 'https'),
:verify_mode => OpenSSL::SSL::VERIFY_NONE,
:ssl_timeout => 20, :open_timeout => 20, :read_timeout => 20,
:ssl_timeout => 300, :open_timeout => 300, :read_timeout => 300,
) do |http|
http.request(req)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment