embulk storages pre-installed to fix filename issue

parent 7c13154d
Embulk::JavaPlugin.register_input(
"ftp", "org.embulk.input.FtpFileInputPlugin",
File.expand_path('../../../../classpath', __FILE__))
Embulk::JavaPlugin.register_input(
"http", "org.embulk.input.http.HttpFileInputPlugin",
File.expand_path('../../../../classpath', __FILE__))
Embulk::JavaPlugin.register_input(
:s3, "org.embulk.input.s3.S3FileInputPlugin",
File.expand_path('../../../../classpath', __FILE__))
...@@ -18,7 +18,11 @@ module Embulk ...@@ -18,7 +18,11 @@ module Embulk
task_reports = yield(task) task_reports = yield(task)
next_config_diff = {} next_config_diff = {}
@logger = LogManager.instance() @logger = LogManager.instance()
if task_reports.length > 0
@logger.info("Your ingested files will be available in the site in a few minutes. Thank for your patience.", print=TRUE) @logger.info("Your ingested files will be available in the site in a few minutes. Thank for your patience.", print=TRUE)
else
@logger.info("No new files where processed for ingestion.", print=TRUE)
end
return next_config_diff return next_config_diff
end end
......
...@@ -20,10 +20,12 @@ module Embulk ...@@ -20,10 +20,12 @@ module Embulk
class BinaryParserPlugin < ParserPlugin class BinaryParserPlugin < ParserPlugin
Plugin.register_parser("binary", self) Plugin.register_parser("binary", self)
METADATA_FILE_NAME = "/.metadata_file"
def self.transaction(config, &control) def self.transaction(config, &control)
tool_dir = config.param('tool_dir', :string, default: ".") tool_dir = config.param('tool_dir', :string, default: ".")
@logger = LogManager.instance() @logger = LogManager.instance()
@logger.setFilename(tool_dir, "parser") @logger.setFilename(tool_dir, "ingestion")
task = { task = {
chunk_size: config.param('chunk_size', :float, default: 0) * DatasetUtils::MEGA, chunk_size: config.param('chunk_size', :float, default: 0) * DatasetUtils::MEGA,
supplier: config.param("supplier", :string, default: "parser"), supplier: config.param("supplier", :string, default: "parser"),
...@@ -53,15 +55,31 @@ module Embulk ...@@ -53,15 +55,31 @@ module Embulk
@logger = LogManager.instance() @logger = LogManager.instance()
while file = file_input.next_file while file = file_input.next_file
begin begin
metadata_file = Dir.pwd + METADATA_FILE_NAME
metadata = File.open(metadata_file) {|f| f.readline} if File.exist?(metadata_file)
File.delete(metadata_file) if File.exist?(metadata_file)
rescue Exception => e
@logger.error("An error occurred while getting file metadata: " + e.to_s)
@logger.error(e.backtrace)
end
begin
if metadata
extension = File.extname metadata
filename = metadata.reverse.sub(extension.reverse, "").reverse
extension.gsub! '.', ''
extension = extension == "" ? DatasetUtils::NONE_EXT : extension
else
filename = "file_from_#{task['input_plugin']}_#{task['date']}" filename = "file_from_#{task['input_plugin']}_#{task['date']}"
each_chunk(file, filename, task['chunk_size']) do |record| extension = @index.to_s.rjust(3, "0")
end
each_chunk(file, filename.chomp, extension.chomp, task['chunk_size']) do |record|
@page_builder.add(record) @page_builder.add(record)
end end
@page_builder.finish @page_builder.finish
Index.instance().increase() Index.instance().increase()
rescue java.lang.OutOfMemoryError rescue java.lang.OutOfMemoryError
@logger.logOutOfMemoryError(path) @logger.logOutOfMemoryError(path)
return @logger.abortExecution()
rescue Exception => e rescue Exception => e
@logger.error("An error occurred during file ingestion: " + e.to_s, print=TRUE) @logger.error("An error occurred during file ingestion: " + e.to_s, print=TRUE)
@logger.error(e.backtrace) @logger.error(e.backtrace)
...@@ -71,8 +89,7 @@ module Embulk ...@@ -71,8 +89,7 @@ module Embulk
end end
private private
def each_chunk(file, filename, chunk_size=DatasetUtils::CHUNK_SIZE) def each_chunk(file, filename, extension, chunk_size=DatasetUtils::CHUNK_SIZE)
extension = @index.to_s.rjust(3, "0")
npart = 0 npart = 0
next_byte = file.read(1) next_byte = file.read(1)
first = TRUE first = TRUE
......
package org.embulk.input;
import java.io.PrintWriter;
public class MetadataUtils
{
public static final String METADATA_FILE_NAME = "/.metadata_file";
public void saveMetadata(String metadata)
{
try
{
String directory = System.getProperty("user.dir");
String metadata_file = directory.concat(METADATA_FILE_NAME);
PrintWriter writer = new PrintWriter(metadata_file, "UTF-8");
writer.println(metadata);
writer.close();
}
catch (Exception e)
{
System.out.println("[ERROR] Could not store metadata: " + metadata);
}
}
}
package org.embulk.input.http;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Objects;
public class BasicAuthOption
{
private final String user;
private final String password;
@JsonCreator
public BasicAuthOption(@JsonProperty("user") String user,
@JsonProperty("password") String password)
{
this.user = user;
this.password = password;
}
@JsonProperty("user")
public String getUser()
{
return user;
}
@JsonProperty("password")
public String getPassword()
{
return password;
}
@Override
public int hashCode()
{
return Objects.hashCode(user, password);
}
@Override
public String toString()
{
return String.format("BasicAuthOption[%s, %s]", getUser(), getPassword());
}
}
package org.embulk.input.http;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Optional;
import java.util.ArrayList;
import java.util.List;
public class PagerOption
{
private final String fromParam;
private final Optional<String> toParam;
private final int start;
private final int pages;
private final int step;
@JsonCreator
public PagerOption(@JsonProperty("from_param") String fromParam,
@JsonProperty("to_param") Optional<String> toParam,
@JsonProperty("start") Optional<Integer> start,
@JsonProperty("pages") int pages,
@JsonProperty("step") Optional<Integer> step)
{
this.fromParam = fromParam;
this.toParam = toParam;
this.start = start.or(0);
this.pages = pages;
this.step = step.or(1);
}
public List<List<QueryOption.Query>> expand()
{
List<List<QueryOption.Query>> queries = new ArrayList<>();
int p = 1;
int index = start;
while (p <= pages) {
List<QueryOption.Query> one = new ArrayList<>();
one.add(new QueryOption.Query(fromParam, Integer.toString(index)));
if (toParam.isPresent()) {
int t = index + step - 1;
one.add(new QueryOption.Query(toParam.get(), Integer.toString(t)));
index = t + 1;
}
else {
index += step;
}
queries.add(one);
p++;
}
return queries;
}
@JsonProperty("from_param")
public String getFromParam()
{
return fromParam;
}
@JsonProperty("to_param")
public Optional<String> getToParam()
{
return toParam;
}
@JsonProperty("start")
public int getStart()
{
return start;
}
@JsonProperty("pages")
public int getPages()
{
return pages;
}
@JsonProperty("step")
public int getStep()
{
return step;
}
@Override
public String toString()
{
return "PagerOption{" +
"fromParam='" + fromParam + '\'' +
", toParam=" + toParam +
", start=" + start +
", pages=" + pages +
", step=" + step +
'}';
}
}
package org.embulk.input.http;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.base.Objects;
import com.google.common.base.Optional;
import java.util.ArrayList;
import java.util.List;
public class ParamsOption
{
private final List<QueryOption> queries;
@JsonCreator
public ParamsOption(List<QueryOption> queries)
{
this.queries = queries;
}
@JsonValue
public List<QueryOption> getQueries()
{
return queries;
}
public List<List<QueryOption.Query>> generateQueries(Optional<PagerOption> pagerOption)
{
List<List<QueryOption.Query>> base = new ArrayList<>(queries.size());
for (QueryOption p : queries) {
base.add(p.expand());
}
int productSize = 1;
int baseSize = base.size();
for (int i = 0; i < baseSize; productSize *= base.get(i).size(), i++) {
}
List<List<QueryOption.Query>> expands = new ArrayList<>(productSize);
for (int i = 0; i < productSize; i++) {
int j = 1;
List<QueryOption.Query> one = new ArrayList<>();
for (List<QueryOption.Query> list : base) {
QueryOption.Query pc = list.get((i / j) % list.size());
one.add(pc);
j *= list.size();
}
if (pagerOption.isPresent()) {
for (List<QueryOption.Query> q : pagerOption.get().expand()) {
expands.add(copyAndConcat(one, q));
}
}
else {
expands.add(one);
}
}
return expands;
}
@Override
public boolean equals(Object obj)
{
if (this == obj) {
return true;
}
if (!(obj instanceof ParamsOption)) {
return false;
}
ParamsOption other = (ParamsOption) obj;
return Objects.equal(queries, other.queries);
}
@Override
public int hashCode()
{
return Objects.hashCode(queries);
}
private List<QueryOption.Query> copyAndConcat(List<QueryOption.Query>... srcs)
{
List<QueryOption.Query> dest = new ArrayList<>();
for (List<QueryOption.Query> src : srcs) {
for (QueryOption.Query q : src) {
dest.add(q.copy());
}
}
return dest;
}
}
package org.embulk.input.http;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Objects;
import com.google.common.base.Optional;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class QueryOption
{
private final String name;
private final Optional<String> value;
private final Optional<List<String>> values;
private final boolean expand;
@JsonCreator
public QueryOption(@JsonProperty("name") String name,
@JsonProperty("value") Optional<String> value,
@JsonProperty("values") Optional<List<String>> values,
@JsonProperty("expand") boolean expand)
{
this.name = name;
this.value = value;
this.values = values;
this.expand = expand;
}
public List<Query> expand()
{
List<Query> dest;
if (value.isPresent()) {
if (expand) {
List<String> expanded = BraceExpansion.expand(value.get());
dest = new ArrayList<>(expanded.size());
for (String s : expanded) {
dest.add(new Query(name, s));
}
}
else {
dest = new ArrayList<>(1);
dest.add(new Query(name, value.get()));
}
}
else if (values.isPresent()) {
if (expand) {
dest = new ArrayList<>(values.get().size());
for (String s : values.get()) {
dest.add(new Query(name, s));
}
}
else {
dest = new ArrayList<>(1);
final String[] valueArr = values.get().toArray(new String[values.get().size()]);
dest.add(new Query(name, valueArr));
}
}
else {
throw new IllegalArgumentException("value or values must be specified to 'params'");
}
return dest;
}
@JsonProperty("name")
public String getName()
{
return name;
}
@JsonProperty("value")
public Optional<String> getValue()
{
return value;
}
@JsonProperty("expand")
public boolean isExpand()
{
return expand;
}
@Override
public boolean equals(Object obj)
{
if (this == obj) {
return true;
}
if (!(obj instanceof QueryOption)) {
return false;
}
QueryOption other = (QueryOption) obj;
return Objects.equal(this.name, other.name) &&
Objects.equal(value, other.value) &&
Objects.equal(expand, other.expand);
}
@Override
public int hashCode()
{
return Objects.hashCode(name, value, expand);
}
@Override
public String toString()
{
return String.format("ParameterConfig[%s, %s, %s]",
getName(), getValue(), isExpand());
}
public static class Query
{
private final String name;
private final String[] values;
public Query(@JsonProperty("name") String name,
@JsonProperty("values") String... values)
{
this.name = name;
this.values = values;
}
public String getName()
{
return name;
}
public String[] getValues()
{
return values;
}
public Query copy()
{
return new Query(this.name, Arrays.copyOf(this.values, this.values.length));
}
}
private static class BraceExpansion
{
public static List<String> expand(String s)
{
return expandRecursive("", s, "", new ArrayList<String>());
}
private static List<String> expandRecursive(String prefix, String s,
String suffix, List<String> dest)
{
// used the code below as reference.
// http://rosettacode.org/wiki/Brace_expansion#Java
int i1 = -1;
int i2 = 0;
String noEscape = s.replaceAll("([\\\\]{2}|[\\\\][,}{])", " ");
StringBuilder sb = null;
outer:
while ((i1 = noEscape.indexOf('{', i1 + 1)) != -1) {
i2 = i1 + 1;
sb = new StringBuilder(s);
for (int depth = 1; i2 < s.length() && depth > 0; i2++) {
char c = noEscape.charAt(i2);
depth = (c == '{') ? ++depth : depth;
depth = (c == '}') ? --depth : depth;
if (c == ',' && depth == 1) {
sb.setCharAt(i2, '\u0000');
}
else if (c == '}' && depth == 0 && sb.indexOf("\u0000") != -1) {
break outer;
}
}
}
if (i1 == -1) {
if (suffix.length() > 0) {
expandRecursive(prefix + s, suffix, "", dest);
}
else {
final String out = String.format("%s%s%s", prefix, s, suffix).
replaceAll("[\\\\]{2}", "\\").replaceAll("[\\\\]([,}{])", "$1");
dest.add(out);
}
}
else {
for (String m : sb.substring(i1 + 1, i2).split("\u0000", -1)) {
expandRecursive(prefix + s.substring(0, i1), m, s.substring(i2 + 1) + suffix, dest);
}
}
return dest;
}
}
}
package org.embulk.input.http;
import com.google.common.collect.ImmutableList;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.util.EntityUtils;
import org.embulk.spi.Exec;
import org.embulk.spi.util.RetryExecutor;
import org.slf4j.Logger;
import javax.net.ssl.SSLException;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.net.UnknownHostException;
import java.util.List;
public class RetryableHandler implements RetryExecutor.Retryable
{
protected final Logger logger = Exec.getLogger(getClass());
private static final List<Class<? extends IOException>> NOT_RETRIABLE_CLASSES = ImmutableList.of(UnknownHostException.class,
InterruptedIOException.class, SSLException.class);
private final HttpClient client;
private final HttpRequestBase request;
private HttpResponse response;
public RetryableHandler(HttpClient client, HttpRequestBase request)
{
this.client = client;
this.request = request;
}
public HttpResponse getResponse()
{
return response;
}
@Override
public Object call() throws Exception
{
if (response != null) {
throw new IllegalStateException("response is already set");
}
HttpResponse response = client.execute(request);
statusIsOkOrThrow(response);
this.response = response;
return null;
}
@Override
public boolean isRetryableException(Exception exception)
{
if (NOT_RETRIABLE_CLASSES.contains(exception.getClass())) {
logger.error(String.format("'%s' is not retriable", exception.getClass()));
return false;
}
return true;
}
@Override
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
throws RetryExecutor.RetryGiveupException
{
logger.warn("retrying {}/{} after {} seconds. Message: {}",
retryCount, retryLimit, retryWait / 1000,
exception.getMessage());
}
@Override
public void onGiveup(Exception firstException, Exception lastException)
throws RetryExecutor.RetryGiveupException
{
logger.error("giveup {}", lastException.getMessage());
}
protected void statusIsOkOrThrow(HttpResponse response)
throws HttpException, IOException
{
int code = response.getStatusLine().getStatusCode();
switch (response.getStatusLine().getStatusCode()) {
case 200:
return;
default:
throw new HttpException(String.format("Request is not successful, code=%d, body=%s",
code, EntityUtils.toString(response.getEntity())));
}
}
}
package org.embulk.input.s3;
import com.amazonaws.AmazonServiceException;
import com.google.common.base.Throwables;
import org.apache.http.HttpStatus;
import org.embulk.spi.Exec;
import org.embulk.spi.util.RetryExecutor;
import org.slf4j.Logger;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.Callable;
import static java.lang.String.format;
import static org.embulk.spi.util.RetryExecutor.RetryGiveupException;
import static org.embulk.spi.util.RetryExecutor.Retryable;
/**
* Retryable utility, regardless the occurred exceptions,
* Also provide a default approach for exception propagation.
*/
class DefaultRetryable<T> implements Retryable<T>
{
private static final Logger log = Exec.getLogger(DefaultRetryable.class);
private static final Set<Integer> NONRETRYABLE_STATUS_CODES = new HashSet<Integer>(2);
private static final Set<String> NONRETRYABLE_ERROR_CODES = new HashSet<String>(1);
private String operationName;
private Callable<T> callable;
static {
NONRETRYABLE_STATUS_CODES.add(HttpStatus.SC_FORBIDDEN);
NONRETRYABLE_STATUS_CODES.add(HttpStatus.SC_METHOD_NOT_ALLOWED);
NONRETRYABLE_ERROR_CODES.add("ExpiredToken");
}
/**
* @param operationName the name that will be referred on logging
*/
public DefaultRetryable(String operationName)
{
this.operationName = operationName;
}
/**
* @param operationName the name that will be referred on logging
* @param callable the operation, either define this at construction time or override the call() method
*/
public DefaultRetryable(String operationName, Callable<T> callable)
{
this.operationName = operationName;
this.callable = callable;
}
public DefaultRetryable()
{
this("Anonymous operation");
}
public DefaultRetryable(Callable<T> callable)
{
this("Anonymous operation", callable);
}
@Override
public T call() throws Exception
{
if (callable != null) {
return callable.call();
}
else {
throw new IllegalStateException("Either override call() or construct with a Runnable");
}
}
@Override
public boolean isRetryableException(Exception exception)
{
// No retry on a subset of service exceptions
if (exception instanceof AmazonServiceException) {
AmazonServiceException ase = (AmazonServiceException) exception;
return !NONRETRYABLE_STATUS_CODES.contains(ase.getStatusCode()) && !NONRETRYABLE_ERROR_CODES.contains(ase.getErrorCode());
}
return true;
}
@Override
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
{
String message = format("%s failed. Retrying %d/%d after %d seconds. Message: %s",
operationName, retryCount, retryLimit, retryWait / 1000, exception.getMessage());
if (retryCount % retryLimit == 0) {
log.warn(message, exception);
}
else {
log.warn(message);
}
}
@Override
public void onGiveup(Exception firstException, Exception lastException)
{
// Exceptions would be propagated, so it's up to the caller to handle, this is just warning
log.warn("Giving up on retrying for {}, first exception is [{}], last exception is [{}]",
operationName, firstException.getMessage(), lastException.getMessage());
}
/**
* Run itself by the supplied executor,
*
* This propagates all exceptions (as unchecked) and unwrap RetryGiveupException for the original cause.
* If the original exception already is a RuntimeException, it will be propagated as is. If not, it will
* be wrapped around with a RuntimeException.
*
* For convenient, it execute normally without retrying when executor is null.
*
* @throws RuntimeException the original cause
*/
public T executeWith(RetryExecutor executor)
{
if (executor == null) {
try {
return this.call();
}
catch (Exception e) {
Throwables.propagate(e);
}
}
try {
return executor.runInterruptible(this);
}
catch (RetryGiveupException e) {
throw Throwables.propagate(e.getCause());
}
catch (InterruptedException e) {
throw Throwables.propagate(e);
}
}
/**
* Run itself by the supplied executor,
*
* Same as `executeWith`, this propagates all original exceptions. But `propagateAsIsException` will
* be re-throw without being wrapped on a RuntimeException, whether it is a checked or unchecked exception.
*
* For convenient, it execute normally without retrying when executor is null.
*
* @throws X whatever checked exception that you decided to propagate directly
* @throws RuntimeException wrap around whatever the original cause of failure (potentially thread interruption)
*/
public <X extends Throwable> T executeWithCheckedException(RetryExecutor executor,
Class<X> propagateAsIsException) throws X
{
if (executor == null) {
try {
return this.call();
}
catch (Exception e) {
Throwables.propagate(e);
}
}
try {
return executor.runInterruptible(this);
}
catch (RetryGiveupException e) {
Throwables.propagateIfInstanceOf(e.getCause(), propagateAsIsException);
throw Throwables.propagate(e.getCause());
}
catch (InterruptedException e) {
throw Throwables.propagate(e);
}
}
}
package org.embulk.input.s3;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import org.embulk.config.Config;
import org.embulk.config.ConfigDefault;
import org.embulk.config.ConfigSource;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
// this class should be moved to embulk-core
public class FileList
{
public interface Task
{
@Config("path_match_pattern")
@ConfigDefault("\".*\"")
String getPathMatchPattern();
@Config("total_file_count_limit")
@ConfigDefault("2147483647")
int getTotalFileCountLimit();
// TODO support more algorithms to combine tasks
@Config("min_task_size")
@ConfigDefault("0")
long getMinTaskSize();
}
public static class Entry
{
private int index;
private long size;
@JsonCreator
public Entry(
@JsonProperty("index") int index,
@JsonProperty("size") long size)
{
this.index = index;
this.size = size;
}
@JsonProperty("index")
public int getIndex()
{
return index;
}
@JsonProperty("size")
public long getSize()
{
return size;
}
}
public static class Builder
{
private final ByteArrayOutputStream binary;
private final OutputStream stream;
private final List<Entry> entries = new ArrayList<>();
private String last = null;
private int limitCount = Integer.MAX_VALUE;
private long minTaskSize = 1;
private Pattern pathMatchPattern;
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
public Builder(Task task)
{
this();
this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern());
this.limitCount = task.getTotalFileCountLimit();
this.minTaskSize = task.getMinTaskSize();
}
public Builder(ConfigSource config)
{
this();
this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*"));
this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
this.minTaskSize = config.get(long.class, "min_task_size", 0L);
}
public Builder()
{
binary = new ByteArrayOutputStream();
try {
stream = new BufferedOutputStream(new GZIPOutputStream(binary));
}
catch (IOException ex) {
throw Throwables.propagate(ex);
}
}
public Builder limitTotalFileCount(int limitCount)
{
this.limitCount = limitCount;
return this;
}
public Builder minTaskSize(long bytes)
{
this.minTaskSize = bytes;
return this;
}
public Builder pathMatchPattern(String pattern)
{
this.pathMatchPattern = Pattern.compile(pattern);
return this;
}
public int size()
{
return entries.size();
}
public boolean needsMore()
{
return size() < limitCount;
}
// returns true if this file is used
public synchronized boolean add(String path, long size)
{
// TODO throw IllegalStateException if stream is already closed
if (!needsMore()) {
return false;
}
if (!pathMatchPattern.matcher(path).find()) {
return false;
}
int index = entries.size();
entries.add(new Entry(index, size));
byte[] data = path.getBytes(StandardCharsets.UTF_8);
castBuffer.putInt(0, data.length);
try {
stream.write(castBuffer.array());
stream.write(data);
}
catch (IOException ex) {
throw Throwables.propagate(ex);
}
last = path;
return true;
}
public FileList build()
{
try {
stream.close();
}
catch (IOException ex) {
throw Throwables.propagate(ex);
}
return new FileList(binary.toByteArray(), getSplits(entries), Optional.fromNullable(last));
}
private List<List<Entry>> getSplits(List<Entry> all)
{
List<List<Entry>> tasks = new ArrayList<>();
long currentTaskSize = 0;
List<Entry> currentTask = new ArrayList<>();
for (Entry entry : all) {
currentTask.add(entry);
currentTaskSize += entry.getSize(); // TODO consider to multiply the size by cost_per_byte, and add cost_per_file
if (currentTaskSize >= minTaskSize) {
tasks.add(currentTask);
currentTask = new ArrayList<>();
currentTaskSize = 0;
}
}
if (!currentTask.isEmpty()) {
tasks.add(currentTask);
}
return tasks;
}
}
private final byte[] data;
private final List<List<Entry>> tasks;
private final Optional<String> last;
@JsonCreator
@Deprecated
public FileList(
@JsonProperty("data") byte[] data,
@JsonProperty("tasks") List<List<Entry>> tasks,
@JsonProperty("last") Optional<String> last)
{
this.data = data;
this.tasks = tasks;
this.last = last;
}
@JsonIgnore
public Optional<String> getLastPath(Optional<String> lastLastPath)
{
if (last.isPresent()) {
return last;
}
return lastLastPath;
}
@JsonIgnore
public int getTaskCount()
{
return tasks.size();
}
@JsonIgnore
public List<String> get(int i)
{
return new EntryList(data, tasks.get(i));
}
@JsonProperty("data")
@Deprecated
public byte[] getData()
{
return data;
}
@JsonProperty("tasks")
@Deprecated
public List<List<Entry>> getTasks()
{
return tasks;
}
@JsonProperty("last")
@Deprecated
public Optional<String> getLast()
{
return last;
}
private class EntryList
extends AbstractList<String>
{
private final byte[] data;
private final List<Entry> entries;
private InputStream stream;
private int current;
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
public EntryList(byte[] data, List<Entry> entries)
{
this.data = data;
this.entries = entries;
try {
this.stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
}
catch (IOException ex) {
throw Throwables.propagate(ex);
}
this.current = 0;
}
@Override
public synchronized String get(int i)
{
Entry e = entries.get(i);
if (e.getIndex() < current) {
// rewind to the head
try {
stream.close();
stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
}
catch (IOException ex) {
throw Throwables.propagate(ex);
}
current = 0;
}
while (current < e.getIndex()) {
readNext();
}
// now current == e.getIndex()
return readNextString();
}
@Override
public int size()
{
return entries.size();
}
private byte[] readNext()
{
try {
stream.read(castBuffer.array());
int n = castBuffer.getInt(0);
byte[] b = new byte[n]; // here should be able to use a pooled buffer because read data is ignored if readNextString doesn't call this method
stream.read(b);
current++;
return b;
}
catch (IOException ex) {
throw Throwables.propagate(ex);
}
}
private String readNextString()
{
return new String(readNext(), StandardCharsets.UTF_8);
}
}
}
package org.embulk.input.s3;
import com.google.common.base.Optional;
import org.embulk.config.Config;
import org.embulk.config.ConfigDefault;
import org.embulk.config.Task;
/**
* HttpProxy is config unit for Input/Output plugins' configs.
*
* TODO: This unit will be moved to embulk/embulk-plugin-units.git.
* TODO: Consider using @JsonProperty(defaultValue=...) in Jackson 2.6+.
*/
public interface HttpProxy
extends Task
{
@Config("host")
public String getHost();
@Config("port")
@ConfigDefault("null")
public Optional<Integer> getPort();
@Config("https")
@ConfigDefault("true")
public boolean getHttps();
@Config("user")
@ConfigDefault("null")
public Optional<String> getUser();
@Config("password")
@ConfigDefault("null")
public Optional<String> getPassword();
}
package org.embulk.input.s3;
import org.embulk.config.Config;
import org.embulk.config.ConfigDefault;
import org.embulk.config.Task;
public interface RetrySupportPluginTask extends Task
{
@Config("maximum_retries")
@ConfigDefault("7")
int getMaximumRetries();
@Config("initial_retry_interval_millis")
@ConfigDefault("30000")
int getInitialRetryIntervalMillis();
@Config("maximum_retry_interval_millis")
@ConfigDefault("480000")
int getMaximumRetryIntervalMillis();
}
package org.embulk.input.s3;
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.google.common.base.Optional;
import org.embulk.config.Config;
import org.embulk.config.ConfigDefault;
import org.embulk.spi.Exec;
import org.slf4j.Logger;
public class S3FileInputPlugin
extends AbstractS3FileInputPlugin
{
public interface S3PluginTask
extends PluginTask
{
@Config("endpoint")
@ConfigDefault("null")
public Optional<String> getEndpoint();
@Config("region")
@ConfigDefault("null")
public Optional<String> getRegion();
}
private static final Logger log = Exec.getLogger(S3FileInputPlugin.class);
@Override
protected Class<? extends PluginTask> getTaskClass()
{
return S3PluginTask.class;
}
@Override
protected AmazonS3 newS3Client(PluginTask task)
{
S3PluginTask t = (S3PluginTask) task;
Optional<String> endpoint = t.getEndpoint();
Optional<String> region = t.getRegion();
AmazonS3ClientBuilder builder = super.defaultS3ClientBuilder(t);
// Favor the `endpoint` configuration, then `region`, if both are absent then `s3.amazonaws.com` will be used.
if (endpoint.isPresent()) {
if (region.isPresent()) {
log.warn("Either configure endpoint or region, " +
"if both is specified only the endpoint will be in effect.");
}
builder.setEndpointConfiguration(new EndpointConfiguration(endpoint.get(), null));
}
else if (region.isPresent()) {
builder.setRegion(region.get());
}
else {
// This is to keep the AWS SDK upgrading to 1.11.x to be backward compatible with old configuration.
//
// On SDK 1.10.x, when neither endpoint nor region is set explicitly, the client's endpoint will be by
// default `s3.amazonaws.com`. And for pre-Signature-V4, this will work fine as the bucket's region
// will be resolved to the appropriate region on server (AWS) side.
//
// On SDK 1.11.x, a region will be computed on client side by AwsRegionProvider and the endpoint now will
// be region-specific `<region>.s3.amazonaws.com` and might be the wrong one.
//
// So a default endpoint of `s3.amazonaws.com` when both endpoint and region configs are absent are
// necessary to make old configurations won't suddenly break. The side effect is that this will render
// AwsRegionProvider useless. And it's worth to note that Signature-V4 won't work with either versions with
// no explicit region or endpoint as the region (inferrable from endpoint) are necessary for signing.
builder.setEndpointConfiguration(new EndpointConfiguration("s3.amazonaws.com", null));
}
return builder.build();
}
}
package org.embulk.input.http;
import com.google.common.base.Optional;
import org.junit.Test;
import java.util.List;
import static org.junit.Assert.assertEquals;
public class TestPagerOption
{
@Test
public void testExpandFromTo() throws Exception
{
List<List<QueryOption.Query>> dest = new PagerOption("from", Optional.of("to"), Optional.of(1), 3,
Optional.of(2)).expand();
assertEquals(dest.size(), 3);
assertEquals(dest.get(0).size(), 2);
assertEquals(dest.get(0).get(0).getName(), "from");
assertEquals(dest.get(0).get(0).getValues()[0], "1");
assertEquals(dest.get(0).get(1).getName(), "to");
assertEquals(dest.get(0).get(1).getValues()[0], "2");
assertEquals(dest.get(1).size(), 2);
assertEquals(dest.get(1).get(0).getName(), "from");
assertEquals(dest.get(1).get(0).getValues()[0], "3");
assertEquals(dest.get(1).get(1).getName(), "to");
assertEquals(dest.get(1).get(1).getValues()[0], "4");
assertEquals(dest.get(2).size(), 2);
assertEquals(dest.get(2).get(0).getName(), "from");
assertEquals(dest.get(2).get(0).getValues()[0], "5");
assertEquals(dest.get(2).get(1).getName(), "to");
assertEquals(dest.get(2).get(1).getValues()[0], "6");
}
@Test
public void testExpandFromToWithDefault() throws Exception
{
Optional<Integer> nullValue = Optional.absent();
List<List<QueryOption.Query>> dest = new PagerOption("from", Optional.of("to"), nullValue, 2, nullValue)
.expand();
assertEquals(dest.size(), 2);
assertEquals(dest.get(0).size(), 2);
assertEquals(dest.get(0).get(0).getName(), "from");
assertEquals(dest.get(0).get(0).getValues()[0], "0");
assertEquals(dest.get(0).get(1).getName(), "to");
assertEquals(dest.get(0).get(1).getValues()[0], "0");
assertEquals(dest.get(1).size(), 2);
assertEquals(dest.get(1).get(0).getName(), "from");
assertEquals(dest.get(1).get(0).getValues()[0], "1");
assertEquals(dest.get(1).get(1).getName(), "to");
assertEquals(dest.get(1).get(1).getValues()[0], "1");
}
@Test
public void testExpandPagenate() throws Exception
{
Optional<String> nullValue = Optional.absent();
List<List<QueryOption.Query>> dest = new PagerOption("page", nullValue, Optional.of(1), 3,
Optional.of(1)).expand();
assertEquals(dest.size(), 3);
assertEquals(dest.get(0).size(), 1);
assertEquals(dest.get(0).get(0).getName(), "page");
assertEquals(dest.get(0).get(0).getValues()[0], "1");
assertEquals(dest.get(1).size(), 1);
assertEquals(dest.get(1).get(0).getName(), "page");
assertEquals(dest.get(1).get(0).getValues()[0], "2");
assertEquals(dest.get(2).size(), 1);
assertEquals(dest.get(2).get(0).getName(), "page");
assertEquals(dest.get(2).get(0).getValues()[0], "3");
}
}
package org.embulk.input.http;
import com.google.common.base.Optional;
import com.google.common.collect.Lists;
import org.junit.Test;
import java.util.List;
import static org.junit.Assert.assertEquals;
public class TestParamsOption
{
@Test
public void testUnexpandQueriesSinglePair() throws Exception
{
Optional<List<String>> nullValues = Optional.absent();
QueryOption q1 = new QueryOption("test1", Optional.of("awasome1"), nullValues, false);
QueryOption q2 = new QueryOption("test2", Optional.of("awasome2"), nullValues, false);
ParamsOption paramsOption = new ParamsOption(Lists.newArrayList(q1, q2));
Optional<PagerOption> pagerOption = Optional.absent();
List<List<QueryOption.Query>> dest = paramsOption.generateQueries(pagerOption);
assertEquals(dest.size(), 1);
assertEquals(dest.get(0).size(), 2);
assertEquals(dest.get(0).get(0).getName(), "test1");
assertEquals(dest.get(0).get(0).getValues()[0], "awasome1");
assertEquals(dest.get(0).get(1).getName(), "test2");
assertEquals(dest.get(0).get(1).getValues()[0], "awasome2");
}
@Test
public void testUnexpandQueriesExpandPair() throws Exception
{
Optional<String> nullValue = Optional.absent();
List<String> values1 = Lists.newArrayList("a", "b");
List<String> values2 = Lists.newArrayList("c", "d");
QueryOption q1 = new QueryOption("test1", nullValue, Optional.of(values1), false);
QueryOption q2 = new QueryOption("test2", nullValue, Optional.of(values2), false);
ParamsOption paramsOption = new ParamsOption(Lists.newArrayList(q1, q2));
Optional<PagerOption> pagerOption = Optional.absent();
List<List<QueryOption.Query>> dest = paramsOption.generateQueries(pagerOption);
assertEquals(dest.size(), 1);
assertEquals(dest.get(0).size(), 2);
assertEquals(dest.get(0).get(0).getName(), "test1");
assertEquals(dest.get(0).get(0).getValues()[0], "a");
assertEquals(dest.get(0).get(0).getValues()[1], "b");
assertEquals(dest.get(0).get(1).getName(), "test2");
assertEquals(dest.get(0).get(1).getValues()[0], "c");
assertEquals(dest.get(0).get(1).getValues()[1], "d");
}
@Test
public void testExpandQueriesSinglePair() throws Exception
{
Optional<List<String>> nullValues = Optional.absent();
QueryOption q1 = new QueryOption("test1", Optional.of("awasome1"), nullValues, true);
QueryOption q2 = new QueryOption("test2", Optional.of("awasome2"), nullValues, true);
ParamsOption paramsOption = new ParamsOption(Lists.newArrayList(q1, q2));
Optional<PagerOption> pagerOption = Optional.absent();
List<List<QueryOption.Query>> dest = paramsOption.generateQueries(pagerOption);
assertEquals(dest.size(), 1);
assertEquals(dest.get(0).size(), 2);
assertEquals(dest.get(0).get(0).getName(), "test1");
assertEquals(dest.get(0).get(0).getValues()[0], "awasome1");
assertEquals(dest.get(0).get(1).getName(), "test2");
assertEquals(dest.get(0).get(1).getValues()[0], "awasome2");
}
@Test
public void testExpandQueriesExpandPair() throws Exception
{
Optional<String> nullValue = Optional.absent();
List<String> values1 = Lists.newArrayList("a", "b");
List<String> values2 = Lists.newArrayList("c", "d");
QueryOption q1 = new QueryOption("test1", nullValue, Optional.of(values1), true);
QueryOption q2 = new QueryOption("test2", nullValue, Optional.of(values2), true);
ParamsOption paramsOption = new ParamsOption(Lists.newArrayList(q1, q2));
Optional<PagerOption> pagerOption = Optional.absent();
List<List<QueryOption.Query>> dest = paramsOption.generateQueries(pagerOption);
assertEquals(dest.size(), 4);
assertEquals(dest.get(0).size(), 2);
assertEquals(dest.get(0).get(0).getName(), "test1");
assertEquals(dest.get(0).get(0).getValues()[0], "a");
assertEquals(dest.get(0).get(1).getName(), "test2");
assertEquals(dest.get(0).get(1).getValues()[0], "c");
assertEquals(dest.get(1).size(), 2);
assertEquals(dest.get(1).get(0).getName(), "test1");
assertEquals(dest.get(1).get(0).getValues()[0], "b");
assertEquals(dest.get(1).get(1).getName(), "test2");
assertEquals(dest.get(1).get(1).getValues()[0], "c");
assertEquals(dest.get(2).size(), 2);
assertEquals(dest.get(2).get(0).getName(), "test1");
assertEquals(dest.get(2).get(0).getValues()[0], "a");
assertEquals(dest.get(2).get(1).getName(), "test2");
assertEquals(dest.get(2).get(1).getValues()[0], "d");
assertEquals(dest.get(3).size(), 2);
assertEquals(dest.get(3).get(0).getName(), "test1");
assertEquals(dest.get(3).get(0).getValues()[0], "b");
assertEquals(dest.get(3).get(1).getName(), "test2");
assertEquals(dest.get(3).get(1).getValues()[0], "d");
}
}
package org.embulk.input.http;
import com.google.common.base.Optional;
import com.google.common.collect.Lists;
import org.junit.Test;
import java.util.List;
import static org.junit.Assert.assertEquals;
public class TestQueryOption
{
@Test
public void testUnexpandSingleValue() throws Exception
{
Optional<List<String>> nullValues = Optional.absent();
QueryOption config = new QueryOption("test", Optional.of("awesome"), nullValues, false);
List<QueryOption.Query> dest = config.expand();
assertEquals(dest.size(), 1);
assertEquals(dest.get(0).getName(), "test");
assertEquals(dest.get(0).getValues().length, 1);
assertEquals(dest.get(0).getValues()[0], "awesome");
}
@Test
public void testUnexpandMultiValue() throws Exception
{
Optional<String> nullValue = Optional.absent();
List<String> values = Lists.newArrayList("a", "b", "c");
QueryOption config = new QueryOption("test", nullValue, Optional.of(values), false);
List<QueryOption.Query> dest = config.expand();
assertEquals(dest.size(), 1);
assertEquals(dest.get(0).getName(), "test");
assertEquals(dest.get(0).getValues().length, 3);
assertEquals(dest.get(0).getValues()[0], "a");
assertEquals(dest.get(0).getValues()[1], "b");
assertEquals(dest.get(0).getValues()[2], "c");
}
@Test
public void testExpandSingleValue() throws Exception
{
Optional<List<String>> nullValues = Optional.absent();
QueryOption config = new QueryOption("test", Optional.of("awesome"), nullValues, true);
List<QueryOption.Query> dest = config.expand();
assertEquals(dest.size(), 1);
assertEquals(dest.get(0).getName(), "test");
assertEquals(dest.get(0).getValues()[0], "awesome");
}
@Test
public void testExpandMultiValue() throws Exception
{
Optional<String> nullValue = Optional.absent();
List<String> values = Lists.newArrayList("a", "b", "c");
QueryOption config = new QueryOption("test", nullValue, Optional.of(values), true);
List<QueryOption.Query> dest = config.expand();
assertEquals(dest.size(), 3);
assertEquals(dest.get(0).getName(), "test");
assertEquals(dest.get(0).getValues().length, 1);
assertEquals(dest.get(0).getValues()[0], "a");
assertEquals(dest.get(1).getValues().length, 1);
assertEquals(dest.get(1).getName(), "test");
assertEquals(dest.get(1).getValues()[0], "b");
assertEquals(dest.get(2).getValues().length, 1);
assertEquals(dest.get(2).getName(), "test");
assertEquals(dest.get(2).getValues()[0], "c");
}
@Test(expected = IllegalArgumentException.class)
public void testExpandRaisesExceptionWhenBothValuesAreNull() throws Exception
{
Optional<List<String>> nullValues = Optional.absent();
Optional<String> nullValue = Optional.absent();
QueryOption config = new QueryOption("test", nullValue, nullValues, false);
config.expand();
}
@Test
public void testUnExpandBrace() throws Exception
{
Optional<List<String>> nullValues = Optional.absent();
QueryOption config = new QueryOption("test", Optional.of("{awesome1,awesome2,awesome3}"), nullValues, false);
List<QueryOption.Query> dest = config.expand();
assertEquals(dest.size(), 1);
assertEquals(dest.get(0).getName(), "test");
assertEquals(dest.get(0).getValues().length, 1);
assertEquals(dest.get(0).getValues()[0], "{awesome1,awesome2,awesome3}");
}
@Test
public void testExpandBrace() throws Exception
{
Optional<List<String>> nullValues = Optional.absent();
QueryOption config = new QueryOption("test", Optional.of("{awesome1,awesome2,awesome3}"), nullValues, true);
List<QueryOption.Query> dest = config.expand();
assertEquals(dest.size(), 3);
assertEquals(dest.get(0).getName(), "test");
assertEquals(dest.get(0).getValues().length, 1);
assertEquals(dest.get(0).getValues()[0], "awesome1");
assertEquals(dest.get(1).getName(), "test");
assertEquals(dest.get(1).getValues().length, 1);
assertEquals(dest.get(1).getValues()[0], "awesome2");
assertEquals(dest.get(2).getValues().length, 1);
assertEquals(dest.get(2).getName(), "test");
assertEquals(dest.get(2).getValues()[0], "awesome3");
}
@Test
public void testExpandEscapedBrace() throws Exception
{
Optional<List<String>> nullValues = Optional.absent();
QueryOption config = new QueryOption("test", Optional.of("{awe\\,some1,awes\\{ome2,awes\\}ome3}"), nullValues, true);
List<QueryOption.Query> dest = config.expand();
assertEquals(dest.get(0).getName(), "test");
assertEquals(dest.get(0).getValues().length, 1);
assertEquals(dest.get(0).getValues()[0], "awe,some1");
assertEquals(dest.get(1).getName(), "test");
assertEquals(dest.get(1).getValues().length, 1);
assertEquals(dest.get(1).getValues()[0], "awes{ome2");
assertEquals(dest.get(2).getName(), "test");
assertEquals(dest.get(2).getValues().length, 1);
assertEquals(dest.get(2).getValues()[0], "awes}ome3");
}
}
package org.embulk.input.s3;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.google.common.base.Optional;
import org.apache.http.HttpStatus;
import org.embulk.EmbulkTestRuntime;
import org.embulk.spi.util.RetryExecutor;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
public class TestAbstractS3FileInputPlugin
{
private static RetryExecutor retryExecutor()
{
return RetryExecutor.retryExecutor()
.withInitialRetryWait(0)
.withMaxRetryWait(0);
}
private static AbstractS3FileInputPlugin dummyS3Plugin()
{
return new AbstractS3FileInputPlugin()
{
@Override
protected Class<? extends PluginTask> getTaskClass()
{
return PluginTask.class;
}
};
}
private static class SomeException extends RuntimeException
{
}
@Rule
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
private AmazonS3 client;
@Before
public void createResources()
{
client = mock(AmazonS3.class);
}
@Test
public void listS3FilesByPrefix()
{
doReturn(new ObjectListing()).when(client).listObjects(any(ListObjectsRequest.class));
FileList.Builder builder = new FileList.Builder();
dummyS3Plugin().listS3FilesByPrefix(builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true);
}
@Test
public void listS3FileByPrefix_with_retry()
{
doThrow(new RuntimeException()).doReturn(new ObjectListing())
.when(client).listObjects(any(ListObjectsRequest.class));
FileList.Builder builder = new FileList.Builder();
dummyS3Plugin().listS3FilesByPrefix(
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
retryExecutor().withRetryLimit(1));
}
@Test(expected = SomeException.class)
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception()
{
doThrow(new SomeException()).doReturn(new ObjectListing())
.when(client).listObjects(any(ListObjectsRequest.class));
FileList.Builder builder = new FileList.Builder();
dummyS3Plugin().listS3FilesByPrefix(
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
retryExecutor().withRetryLimit(0));
}
@Test(expected = AmazonServiceException.class)
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_forbidden_code()
{
AmazonServiceException exception = new AmazonServiceException("Forbidden exception");
exception.setStatusCode(HttpStatus.SC_FORBIDDEN);
exception.setErrorType(AmazonServiceException.ErrorType.Client);
doThrow(exception).doReturn(new ObjectListing())
.when(client).listObjects(any(ListObjectsRequest.class));
FileList.Builder builder = new FileList.Builder();
dummyS3Plugin().listS3FilesByPrefix(
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
retryExecutor().withRetryLimit(1));
}
@Test(expected = AmazonServiceException.class)
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_methodnotallow_code()
{
AmazonServiceException exception = new AmazonServiceException("method not allow exception");
exception.setStatusCode(HttpStatus.SC_METHOD_NOT_ALLOWED);
exception.setErrorType(AmazonServiceException.ErrorType.Client);
doThrow(exception).doReturn(new ObjectListing())
.when(client).listObjects(any(ListObjectsRequest.class));
FileList.Builder builder = new FileList.Builder();
dummyS3Plugin().listS3FilesByPrefix(
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
retryExecutor().withRetryLimit(1));
}
@Test(expected = AmazonServiceException.class)
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_expiredToken_code()
{
AmazonServiceException exception = new AmazonServiceException("expired token exception");
exception.setStatusCode(HttpStatus.SC_BAD_REQUEST);
exception.setErrorCode("ExpiredToken");
exception.setErrorType(AmazonServiceException.ErrorType.Client);
doThrow(exception).doReturn(new ObjectListing())
.when(client).listObjects(any(ListObjectsRequest.class));
FileList.Builder builder = new FileList.Builder();
dummyS3Plugin().listS3FilesByPrefix(
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
retryExecutor().withRetryLimit(1));
}
@Test
public void addS3DirectObject()
{
doReturn(new ObjectMetadata()).when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
dummyS3Plugin().addS3DirectObject(builder, client, "some_bucket", "some_prefix");
}
@Test
public void addS3DirectObject_with_retry()
{
doThrow(new RuntimeException()).doReturn(new ObjectMetadata())
.when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
dummyS3Plugin().addS3DirectObject(
builder, client, "some_bucket", "some_prefix",
retryExecutor());
}
@Test(expected = SomeException.class)
public void addS3DirectObject_on_retry_gave_up_should_throw_original_exception()
{
doThrow(new SomeException()).doReturn(new ObjectMetadata())
.when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
dummyS3Plugin().addS3DirectObject(
builder, client, "some_bucket", "some_prefix",
retryExecutor().withRetryLimit(0));
}
}
package org.embulk.input.s3;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.BasicSessionCredentials;
import com.amazonaws.auth.policy.Policy;
import com.amazonaws.auth.policy.Resource;
import com.amazonaws.auth.policy.Statement;
import com.amazonaws.auth.policy.actions.S3Actions;
import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder;
import com.amazonaws.services.securitytoken.model.Credentials;
import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
import org.embulk.EmbulkTestRuntime;
import org.embulk.config.ConfigDiff;
import org.embulk.config.ConfigSource;
import org.embulk.input.s3.TestS3FileInputPlugin.Control;
import org.embulk.spi.FileInputRunner;
import org.embulk.spi.TestPageBuilderReader;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import static org.embulk.input.s3.TestS3FileInputPlugin.assertRecords;
import static org.embulk.input.s3.TestS3FileInputPlugin.parserConfig;
import static org.embulk.input.s3.TestS3FileInputPlugin.schemaConfig;
import static org.junit.Assert.assertEquals;
import static org.junit.Assume.assumeNotNull;
public class TestAwsCredentials
{
private static String EMBULK_S3_TEST_BUCKET;
private static String EMBULK_S3_TEST_ACCESS_KEY_ID;
private static String EMBULK_S3_TEST_SECRET_ACCESS_KEY;
private static final String EMBULK_S3_TEST_PATH_PREFIX = "embulk_input_s3_test";
/*
* This test case requires environment variables:
* EMBULK_S3_TEST_BUCKET
* EMBULK_S3_TEST_ACCESS_KEY_ID
* EMBULK_S3_TEST_SECRET_ACCESS_KEY
* If the variables not set, the test case is skipped.
*/
@BeforeClass
public static void initializeConstantVariables()
{
EMBULK_S3_TEST_BUCKET = System.getenv("EMBULK_S3_TEST_BUCKET");
EMBULK_S3_TEST_ACCESS_KEY_ID = System.getenv("EMBULK_S3_TEST_ACCESS_KEY_ID");
EMBULK_S3_TEST_SECRET_ACCESS_KEY = System.getenv("EMBULK_S3_TEST_SECRET_ACCESS_KEY");
assumeNotNull(EMBULK_S3_TEST_BUCKET, EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY);
}
@Rule
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
private ConfigSource config;
private FileInputRunner runner;
private TestPageBuilderReader.MockPageOutput output;
@Before
public void createResources()
{
config = runtime.getExec().newConfigSource()
.set("type", "s3")
.set("bucket", EMBULK_S3_TEST_BUCKET)
.set("path_prefix", EMBULK_S3_TEST_PATH_PREFIX)
.set("parser", parserConfig(schemaConfig()));
runner = new FileInputRunner(runtime.getInstance(S3FileInputPlugin.class));
output = new TestPageBuilderReader.MockPageOutput();
}
private void doTest(ConfigSource config)
{
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
assertRecords(config, output);
}
@Test
public void useBasic()
{
ConfigSource config = this.config.deepCopy()
.set("auth_method", "basic")
.set("access_key_id", EMBULK_S3_TEST_ACCESS_KEY_ID)
.set("secret_access_key", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
doTest(config);
}
@Test
public void useEnv()
{
// TODO
}
@Test
public void useInstance()
{
// TODO
}
@Test
public void useProfile()
{
// TODO
}
@Test
public void useProperties()
{
String origAccessKeyId = System.getProperty("aws.accessKeyId");
String origSecretKey = System.getProperty("aws.secretKey");
try {
ConfigSource config = this.config.deepCopy().set("auth_method", "properties");
System.setProperty("aws.accessKeyId", EMBULK_S3_TEST_ACCESS_KEY_ID);
System.setProperty("aws.secretKey", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
doTest(config);
}
finally {
if (origAccessKeyId != null) {
System.setProperty("aws.accessKeyId", origAccessKeyId);
}
if (origSecretKey != null) {
System.setProperty("aws.secretKey", origAccessKeyId);
}
}
}
@Test
public void useAnonymous()
{
// TODO
}
@Test
public void useSession()
{
BasicSessionCredentials sessionCredentials = getSessionCredentials();
ConfigSource config = this.config.deepCopy()
.set("auth_method", "session")
.set("access_key_id", sessionCredentials.getAWSAccessKeyId())
.set("secret_access_key", sessionCredentials.getAWSSecretKey())
.set("session_token", sessionCredentials.getSessionToken());
doTest(config);
}
private static BasicSessionCredentials getSessionCredentials()
{
AWSSecurityTokenService stsClient = AWSSecurityTokenServiceClientBuilder.standard().withCredentials(
new AWSStaticCredentialsProvider(new BasicAWSCredentials(EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY))
).build();
GetFederationTokenRequest getFederationTokenRequest = new GetFederationTokenRequest();
getFederationTokenRequest.setDurationSeconds(7200);
getFederationTokenRequest.setName("dummy");
Policy policy = new Policy().withStatements(new Statement(Statement.Effect.Allow)
.withActions(S3Actions.ListObjects, S3Actions.GetObject)
.withResources(
new Resource("arn:aws:s3:::" + EMBULK_S3_TEST_BUCKET + "/" + EMBULK_S3_TEST_PATH_PREFIX + "/*"),
new Resource("arn:aws:s3:::" + EMBULK_S3_TEST_BUCKET)));
getFederationTokenRequest.setPolicy(policy.toJson());
GetFederationTokenResult federationTokenResult = stsClient.getFederationToken(getFederationTokenRequest);
Credentials sessionCredentials = federationTokenResult.getCredentials();
return new BasicSessionCredentials(
sessionCredentials.getAccessKeyId(),
sessionCredentials.getSecretAccessKey(),
sessionCredentials.getSessionToken());
}
}
package org.embulk.input.s3;
import org.embulk.EmbulkTestRuntime;
import org.embulk.spi.util.RetryExecutor;
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
import org.junit.Rule;
import org.junit.Test;
import java.io.IOException;
import java.util.concurrent.Callable;
import static java.lang.String.format;
import static org.msgpack.core.Preconditions.checkArgument;
public class TestDefaultRetryable
{
@Rule
public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); // require for DefaultRetryable's logger
private static class Deny extends RuntimeException implements Callable
{
private int pastCalls = 0;
private final int targetCalls;
private Exception exception;
Deny(int targetCalls)
{
super(format("Try harder! (Will pass after %d calls)", targetCalls));
checkArgument(targetCalls >= 0);
this.targetCalls = targetCalls;
}
static Deny until(int calls)
{
return new Deny(calls);
}
Deny with(Exception exception)
{
this.exception = exception;
return this;
}
@Override
public Object call() throws Exception
{
if (pastCalls < targetCalls) {
pastCalls++;
if (exception != null) {
throw exception;
}
else {
throw this;
}
}
pastCalls++;
return null;
}
}
private static RetryExecutor retryExecutor()
{
return RetryExecutor.retryExecutor()
.withInitialRetryWait(0)
.withMaxRetryWait(0);
}
@Test
@SuppressWarnings("unchecked")
public void guarantee_retry_attempts_just_like_Retryable() throws Exception
{
retryExecutor()
.withRetryLimit(0)
.run(new DefaultRetryable(Deny.until(0)));
retryExecutor()
.withRetryLimit(1)
.run(new DefaultRetryable(Deny.until(1)));
retryExecutor()
.withRetryLimit(2)
.run(new DefaultRetryable(Deny.until(1)));
retryExecutor()
.withRetryLimit(3)
.run(new DefaultRetryable(Deny.until(2)));
}
@Test(expected = RetryGiveupException.class)
@SuppressWarnings("unchecked")
public void fail_after_exceeding_attempts_just_like_Retryable() throws Exception
{
retryExecutor()
.withRetryLimit(3)
.run(new DefaultRetryable(Deny.until(4)));
}
@Test(expected = Deny.class)
@SuppressWarnings("unchecked")
public void execute_should_unwrap_RetryGiveupException() throws Exception
{
new DefaultRetryable(Deny.until(4))
.executeWith(retryExecutor().withRetryLimit(3));
}
@Test(expected = RuntimeException.class)
@SuppressWarnings("unchecked")
public void execute_should_unwrap_RetryGiveupException_but_rewrap_checked_exception_in_a_RuntimeException()
{
new DefaultRetryable(Deny.until(4).with(new Exception("A checked exception")))
.executeWith(retryExecutor().withRetryLimit(3));
}
@Test(expected = IOException.class)
public void executeAndPropagateAsIs_should_leave_original_exception_unwrapped() throws IOException
{
RetryExecutor retryExc = retryExecutor().withRetryLimit(3);
// An explicit type parameter for operation return type is needed here,
// Without one, javac (at least on 1.8) will fails to infer the X exception type parameter.
new DefaultRetryable<Object>() {
@Override
public Object call() throws IOException
{
throw new IOException();
}
}.executeWithCheckedException(retryExc, IOException.class);
}
@Test(expected = IllegalStateException.class)
public void execute_without_an_implementation_should_throw_an_IllegalStateException()
{
new DefaultRetryable().executeWith(retryExecutor());
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment