embulk storages pre-installed to fix filename issue

parent 7c13154d
Embulk::JavaPlugin.register_input(
"ftp", "org.embulk.input.FtpFileInputPlugin",
File.expand_path('../../../../classpath', __FILE__))
Embulk::JavaPlugin.register_input(
"http", "org.embulk.input.http.HttpFileInputPlugin",
File.expand_path('../../../../classpath', __FILE__))
Embulk::JavaPlugin.register_input(
:s3, "org.embulk.input.s3.S3FileInputPlugin",
File.expand_path('../../../../classpath', __FILE__))
......@@ -18,7 +18,11 @@ module Embulk
task_reports = yield(task)
next_config_diff = {}
@logger = LogManager.instance()
@logger.info("Your ingested files will be available in the site in a few minutes. Thank for your patience.", print=TRUE)
if task_reports.length > 0
@logger.info("Your ingested files will be available in the site in a few minutes. Thank for your patience.", print=TRUE)
else
@logger.info("No new files where processed for ingestion.", print=TRUE)
end
return next_config_diff
end
......
......@@ -20,10 +20,12 @@ module Embulk
class BinaryParserPlugin < ParserPlugin
Plugin.register_parser("binary", self)
METADATA_FILE_NAME = "/.metadata_file"
def self.transaction(config, &control)
tool_dir = config.param('tool_dir', :string, default: ".")
@logger = LogManager.instance()
@logger.setFilename(tool_dir, "parser")
@logger.setFilename(tool_dir, "ingestion")
task = {
chunk_size: config.param('chunk_size', :float, default: 0) * DatasetUtils::MEGA,
supplier: config.param("supplier", :string, default: "parser"),
......@@ -53,15 +55,31 @@ module Embulk
@logger = LogManager.instance()
while file = file_input.next_file
begin
filename = "file_from_#{task['input_plugin']}_#{task['date']}"
each_chunk(file, filename, task['chunk_size']) do |record|
metadata_file = Dir.pwd + METADATA_FILE_NAME
metadata = File.open(metadata_file) {|f| f.readline} if File.exist?(metadata_file)
File.delete(metadata_file) if File.exist?(metadata_file)
rescue Exception => e
@logger.error("An error occurred while getting file metadata: " + e.to_s)
@logger.error(e.backtrace)
end
begin
if metadata
extension = File.extname metadata
filename = metadata.reverse.sub(extension.reverse, "").reverse
extension.gsub! '.', ''
extension = extension == "" ? DatasetUtils::NONE_EXT : extension
else
filename = "file_from_#{task['input_plugin']}_#{task['date']}"
extension = @index.to_s.rjust(3, "0")
end
each_chunk(file, filename.chomp, extension.chomp, task['chunk_size']) do |record|
@page_builder.add(record)
end
@page_builder.finish
Index.instance().increase()
rescue java.lang.OutOfMemoryError
@logger.logOutOfMemoryError(path)
return
@logger.abortExecution()
rescue Exception => e
@logger.error("An error occurred during file ingestion: " + e.to_s, print=TRUE)
@logger.error(e.backtrace)
......@@ -71,8 +89,7 @@ module Embulk
end
private
def each_chunk(file, filename, chunk_size=DatasetUtils::CHUNK_SIZE)
extension = @index.to_s.rjust(3, "0")
def each_chunk(file, filename, extension, chunk_size=DatasetUtils::CHUNK_SIZE)
npart = 0
next_byte = file.read(1)
first = TRUE
......
package org.embulk.input;
import java.io.PrintWriter;
public class MetadataUtils
{
public static final String METADATA_FILE_NAME = "/.metadata_file";
public void saveMetadata(String metadata)
{
try
{
String directory = System.getProperty("user.dir");
String metadata_file = directory.concat(METADATA_FILE_NAME);
PrintWriter writer = new PrintWriter(metadata_file, "UTF-8");
writer.println(metadata);
writer.close();
}
catch (Exception e)
{
System.out.println("[ERROR] Could not store metadata: " + metadata);
}
}
}
package org.embulk.input.http;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Objects;
public class BasicAuthOption
{
private final String user;
private final String password;
@JsonCreator
public BasicAuthOption(@JsonProperty("user") String user,
@JsonProperty("password") String password)
{
this.user = user;
this.password = password;
}
@JsonProperty("user")
public String getUser()
{
return user;
}
@JsonProperty("password")
public String getPassword()
{
return password;
}
@Override
public int hashCode()
{
return Objects.hashCode(user, password);
}
@Override
public String toString()
{
return String.format("BasicAuthOption[%s, %s]", getUser(), getPassword());
}
}
package org.embulk.input.http;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Optional;
import java.util.ArrayList;
import java.util.List;
public class PagerOption
{
private final String fromParam;
private final Optional<String> toParam;
private final int start;
private final int pages;
private final int step;
@JsonCreator
public PagerOption(@JsonProperty("from_param") String fromParam,
@JsonProperty("to_param") Optional<String> toParam,
@JsonProperty("start") Optional<Integer> start,
@JsonProperty("pages") int pages,
@JsonProperty("step") Optional<Integer> step)
{
this.fromParam = fromParam;
this.toParam = toParam;
this.start = start.or(0);
this.pages = pages;
this.step = step.or(1);
}
public List<List<QueryOption.Query>> expand()
{
List<List<QueryOption.Query>> queries = new ArrayList<>();
int p = 1;
int index = start;
while (p <= pages) {
List<QueryOption.Query> one = new ArrayList<>();
one.add(new QueryOption.Query(fromParam, Integer.toString(index)));
if (toParam.isPresent()) {
int t = index + step - 1;
one.add(new QueryOption.Query(toParam.get(), Integer.toString(t)));
index = t + 1;
}
else {
index += step;
}
queries.add(one);
p++;
}
return queries;
}
@JsonProperty("from_param")
public String getFromParam()
{
return fromParam;
}
@JsonProperty("to_param")
public Optional<String> getToParam()
{
return toParam;
}
@JsonProperty("start")
public int getStart()
{
return start;
}
@JsonProperty("pages")
public int getPages()
{
return pages;
}
@JsonProperty("step")
public int getStep()
{
return step;
}
@Override
public String toString()
{
return "PagerOption{" +
"fromParam='" + fromParam + '\'' +
", toParam=" + toParam +
", start=" + start +
", pages=" + pages +
", step=" + step +
'}';
}
}
package org.embulk.input.http;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.base.Objects;
import com.google.common.base.Optional;
import java.util.ArrayList;
import java.util.List;
public class ParamsOption
{
private final List<QueryOption> queries;
@JsonCreator
public ParamsOption(List<QueryOption> queries)
{
this.queries = queries;
}
@JsonValue
public List<QueryOption> getQueries()
{
return queries;
}
public List<List<QueryOption.Query>> generateQueries(Optional<PagerOption> pagerOption)
{
List<List<QueryOption.Query>> base = new ArrayList<>(queries.size());
for (QueryOption p : queries) {
base.add(p.expand());
}
int productSize = 1;
int baseSize = base.size();
for (int i = 0; i < baseSize; productSize *= base.get(i).size(), i++) {
}
List<List<QueryOption.Query>> expands = new ArrayList<>(productSize);
for (int i = 0; i < productSize; i++) {
int j = 1;
List<QueryOption.Query> one = new ArrayList<>();
for (List<QueryOption.Query> list : base) {
QueryOption.Query pc = list.get((i / j) % list.size());
one.add(pc);
j *= list.size();
}
if (pagerOption.isPresent()) {
for (List<QueryOption.Query> q : pagerOption.get().expand()) {
expands.add(copyAndConcat(one, q));
}
}
else {
expands.add(one);
}
}
return expands;
}
@Override
public boolean equals(Object obj)
{
if (this == obj) {
return true;
}
if (!(obj instanceof ParamsOption)) {
return false;
}
ParamsOption other = (ParamsOption) obj;
return Objects.equal(queries, other.queries);
}
@Override
public int hashCode()
{
return Objects.hashCode(queries);
}
private List<QueryOption.Query> copyAndConcat(List<QueryOption.Query>... srcs)
{
List<QueryOption.Query> dest = new ArrayList<>();
for (List<QueryOption.Query> src : srcs) {
for (QueryOption.Query q : src) {
dest.add(q.copy());
}
}
return dest;
}
}
package org.embulk.input.http;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Objects;
import com.google.common.base.Optional;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class QueryOption
{
private final String name;
private final Optional<String> value;
private final Optional<List<String>> values;
private final boolean expand;
@JsonCreator
public QueryOption(@JsonProperty("name") String name,
@JsonProperty("value") Optional<String> value,
@JsonProperty("values") Optional<List<String>> values,
@JsonProperty("expand") boolean expand)
{
this.name = name;
this.value = value;
this.values = values;
this.expand = expand;
}
public List<Query> expand()
{
List<Query> dest;
if (value.isPresent()) {
if (expand) {
List<String> expanded = BraceExpansion.expand(value.get());
dest = new ArrayList<>(expanded.size());
for (String s : expanded) {
dest.add(new Query(name, s));
}
}
else {
dest = new ArrayList<>(1);
dest.add(new Query(name, value.get()));
}
}
else if (values.isPresent()) {
if (expand) {
dest = new ArrayList<>(values.get().size());
for (String s : values.get()) {
dest.add(new Query(name, s));
}
}
else {
dest = new ArrayList<>(1);
final String[] valueArr = values.get().toArray(new String[values.get().size()]);
dest.add(new Query(name, valueArr));
}
}
else {
throw new IllegalArgumentException("value or values must be specified to 'params'");
}
return dest;
}
@JsonProperty("name")
public String getName()
{
return name;
}
@JsonProperty("value")
public Optional<String> getValue()
{
return value;
}
@JsonProperty("expand")
public boolean isExpand()
{
return expand;
}
@Override
public boolean equals(Object obj)
{
if (this == obj) {
return true;
}
if (!(obj instanceof QueryOption)) {
return false;
}
QueryOption other = (QueryOption) obj;
return Objects.equal(this.name, other.name) &&
Objects.equal(value, other.value) &&
Objects.equal(expand, other.expand);
}
@Override
public int hashCode()
{
return Objects.hashCode(name, value, expand);
}
@Override
public String toString()
{
return String.format("ParameterConfig[%s, %s, %s]",
getName(), getValue(), isExpand());
}
public static class Query
{
private final String name;
private final String[] values;
public Query(@JsonProperty("name") String name,
@JsonProperty("values") String... values)
{
this.name = name;
this.values = values;
}
public String getName()
{
return name;
}
public String[] getValues()
{
return values;
}
public Query copy()
{
return new Query(this.name, Arrays.copyOf(this.values, this.values.length));
}
}
private static class BraceExpansion
{
public static List<String> expand(String s)
{
return expandRecursive("", s, "", new ArrayList<String>());
}
private static List<String> expandRecursive(String prefix, String s,
String suffix, List<String> dest)
{
// used the code below as reference.
// http://rosettacode.org/wiki/Brace_expansion#Java
int i1 = -1;
int i2 = 0;
String noEscape = s.replaceAll("([\\\\]{2}|[\\\\][,}{])", " ");
StringBuilder sb = null;
outer:
while ((i1 = noEscape.indexOf('{', i1 + 1)) != -1) {
i2 = i1 + 1;
sb = new StringBuilder(s);