Commit e6bea615 authored by Roque's avatar Roque

Allow ebulk to run in "always yes" mode

See merge request !2
parents 452e2f8d 04090942
......@@ -44,6 +44,7 @@ UPDATE="U"
RESUME="R"
DOWNLOAD="D"
ASK="A"
ALWAYS_YES="false"
# load data lake url from file if exists
if [ -f "$DATA_LAKE_URL_FILE" ]; then
......@@ -61,6 +62,7 @@ function helpReadme {
}
function checkParameters {
re='^[_A-Za-z.0-9-]*$'
if [ ! -f $TEMPLATE_FILE ]; then
echo
echo -e "${ORANGE}[ERROR] File '$TEMPLATE_FILE' not found!${NC}" >&2; return 1
......@@ -90,6 +92,19 @@ function checkParameters {
fi
fi
EBULK_DATASET_FILE="$DATASET_DIR$EBULK_DATASET_FILE_NAME"
if ! [[ $REFERENCE =~ $re ]] ; then
if [ "$REFERENCE" = "." ] && [[ -z "$STORAGE" ]] ; then
echo
echo -e "${ORANGE}[ERROR] You are not in a dataset directory ${GREEN}'$REFERENCE'${ORANGE}.${NC}"
echo
else
echo
echo -e "${ORANGE}[ERROR] Error in argument: invalid dataset name ${GREEN}'$REFERENCE'${ORANGE}.${NC}"
echo -e "${ORANGE}[ERROR] Only alphanumerics, dots ( . ), underscores ( _ ) and hyphens ( - ) are allowed.${NC}"
echo
fi
helpReadme >&2; return 1
fi
if [[ $DATASET_DIR != $REFERENCE ]]; then
if [ "$REFERENCE" = "." ] ; then
REFERENCE=$(basename "$DATASET_DIR")
......@@ -123,7 +138,6 @@ function checkParameters {
if [ "$DATA_SET" == "." ] ; then
DATA_SET=$(basename $(pwd))
fi
re='^[_A-Za-z.0-9-]*$'
if ! [[ $DATA_SET =~ $re ]] ; then
if [ "$DATA_SET" = "." ] && [[ -z "$STORAGE" ]] ; then
echo
......@@ -280,6 +294,7 @@ function updateConfigFile {
ING_URL=\"$ING_URL\"
STORAGE=\"$STORAGE\"
STATUS=\"$STATUS\"
ALWAYS_YES=\"$ALWAYS_YES\"
S3_BUCKET=\"$S3_BUCKET\"
S3_PREFIX=\"$S3_PREFIX\"
......@@ -603,6 +618,8 @@ while [ "$1" != "" ]; do
-d | --directory ) shift
DATASET_DIR=$1
;;
-y | --yes ) ALWAYS_YES="true"
;;
-s | --storage ) shift
STORAGE=$1
;;
......@@ -746,6 +763,9 @@ case $OPERATION in
echo
fi
echo "### DATASET DOWNLOAD ###"
if [ "$ALWAYS_YES" = "true" ] ; then
echo "[always yes mode]"
fi
echo
if [ "$DISCARD_CHANGES" != "" ] ; then
DISCARD_CHANGES_FILE="$DATASET_DIR$DISCARD_CHANGES_FILE_NAME"
......@@ -755,8 +775,10 @@ case $OPERATION in
echo -e "** The dataset will be downloaded in the specified directory: $DATASET_DIR"
fi
echo
read -n 1 -s -r -p "Press any key to continue"
echo
if [ "$ALWAYS_YES" = "false" ] ; then
read -n 1 -s -r -p "Press any key to continue"
echo
fi
runProcess
;;
push)
......@@ -795,12 +817,17 @@ case $OPERATION in
esac
fi
echo "### DATASET INGESTION ###"
echo
echo -e "** The tool will look for dataset files in the specified $MESSAGE"
echo -e "** Please make sure to put your dataset files there for ingestion."
echo
read -n 1 -s -r -p "Press any key to continue"
echo
if [ "$ALWAYS_YES" = "true" ] ; then
echo "[always yes mode]"
else
echo
echo -e "** The tool will look for dataset files in the specified $MESSAGE"
echo -e "** Please make sure to put your dataset files there for ingestion."
echo
read -n 1 -s -r -p "Press any key to continue"
fi
echo
runProcess
;;
esac
......
......@@ -8,6 +8,7 @@ in:
chunk_size: $CHUNK
output_path: $DATASET_DIR
tool_dir: $TOOL_DIR
always_yes: $ALWAYS_YES
out:
type: fif
......
exec:
exec:
max_threads: 1
min_output_tasks: 1
in:
......@@ -10,8 +10,9 @@ in:
erp5_url: $DOWN_URL
tool_dir: $TOOL_DIR
status: $STATUS
always_yes: $ALWAYS_YES
out:
out:
type: wendelin
erp5_url: $ING_URL
type_input: "filesystem"
......
......@@ -4,6 +4,7 @@
# PLEASE FILL THE 'IN' SECTION ACCORDING TO YOUR PLUGIN
in:
always_yes: $ALWAYS_YES
# FOR EXAMPLE CSV FILES
# type: file
......@@ -25,13 +26,13 @@ in:
chunk_size: $CHUNK
storage: $STORAGE
out:
out:
type: wendelin
erp5_url: $ING_URL
tool_dir: $TOOL_DIR
data_set: $DATA_SET
erp5_base_url: $DOWN_URL
exec:
exec:
max_threads: 1
min_output_tasks: 1
......@@ -8,6 +8,7 @@ in:
user: $FTP_USER
password: $FTP_PASSWORD
path_prefix: $FTP_PATH
always_yes: $ALWAYS_YES
#ssl_verify: false
#port: 21
......@@ -20,14 +21,14 @@ in:
chunk_size: $CHUNK
storage: $STORAGE
out:
out:
type: wendelin
erp5_url: $ING_URL
tool_dir: $TOOL_DIR
data_set: $DATA_SET
erp5_base_url: $DOWN_URL
exec:
exec:
max_threads: 1
min_output_tasks: 1
......@@ -6,6 +6,7 @@ in:
type: http
url: $HTTP_URL
method: $HTTP_METHOD
always_yes: $ALWAYS_YES
# basic_auth:
# user: MyUser
# password: MyPassword
......@@ -22,14 +23,14 @@ in:
chunk_size: $CHUNK
storage: $STORAGE
out:
out:
type: wendelin
erp5_url: $ING_URL
tool_dir: $TOOL_DIR
data_set: $DATA_SET
erp5_base_url: $DOWN_URL
exec:
exec:
max_threads: 1
min_output_tasks: 1
......@@ -9,12 +9,13 @@ in:
access_key_id: $S3_ACCESS_KEY
secret_access_key: $S3_SECRET_KEY
auth_method: $S3_AUTH_METHOD
# endpoint:
# region:
# path_match_pattern:
always_yes: $ALWAYS_YES
# endpoint:
# region:
# path_match_pattern:
# http_proxy:
# host:
# port:
# host:
# port:
# PLEASE LEAVE THE SECTIONS BELOW AS THEY ARE (unless you know what you are doing)
......@@ -26,14 +27,14 @@ in:
chunk_size: $CHUNK
storage: $STORAGE
out:
out:
type: wendelin
erp5_url: $ING_URL
tool_dir: $TOOL_DIR
data_set: $DATA_SET
erp5_base_url: $DOWN_URL
exec:
exec:
max_threads: 1
min_output_tasks: 1
......@@ -60,11 +60,15 @@ module Embulk
if not conflicts.empty?
puts
@logger.warn("Some of your local files already exist in remote dataset.", print=TRUE)
@logger.warn("You may want to cancel this partial ingestion and download the full dataset to make local changes.", print=TRUE)
if task['always_yes_mode'] != "true"
@logger.warn("You may want to cancel this partial ingestion and download the full dataset to make local changes.", print=TRUE)
end
puts
@logger.warn("Current ingestion WILL OVERWRITE the following files in remote dataset:", print=TRUE)
puts "** press key **"
option = gets
if task['always_yes_mode'] != "true"
puts "** press key **"
option = gets
end
print_short = conflicts.length > 500
@dataset_utils.showChangesList(conflicts, "", print_short, status=DatasetUtils::OVERWRITE)
else
......@@ -85,6 +89,7 @@ module Embulk
if task['chunk_size'] == 0
task['chunk_size'] = DatasetUtils::CHUNK_SIZE
end
task['always_yes_mode'] = config.param('always_yes', :string)
@data_set = task['data_set']
@dataset_utils = DatasetUtils.new("")
paths = config.param('path_prefix', :array)
......@@ -133,14 +138,14 @@ module Embulk
else
if not @dataset_utils.partialIngestionFileExist()
@logger.info("Checking local dataset...", print=TRUE)
if not @dataset_utils.reportUpToDate(data_stream_dict, @data_set)
if not @dataset_utils.reportUpToDate(data_stream_dict, @data_set) and task['always_yes_mode'] != "true"
puts
@logger.error("Your current dataset is outdated. Please, run a download to update it before ingest your changes.", print=TRUE)
puts
@logger.abortExecution(error=FALSE)
end
end
end
end
end
end
@logger.info("Supplier: #{task['supplier']}")
@logger.info("Dataset name: #{task['data_set']}")
......@@ -153,12 +158,14 @@ module Embulk
end
self.status(task, push=TRUE)
@logger.info("Continue with ingestion? (y/n)", print=TRUE)
option = gets
option = option.chomp
if option == "n"
@logger.info("Ingestion cancelled by user.", print=TRUE)
@logger.abortExecution()
if task['always_yes_mode'] != "true"
@logger.info("Continue with ingestion? (y/n)", print=TRUE)
option = gets
option = option.chomp
if option == "n"
@logger.info("Ingestion cancelled by user.", print=TRUE)
@logger.abortExecution()
end
end
if not @dataset_utils.reportFileExist()
@dataset_utils.createReportFile()
......
......@@ -31,6 +31,14 @@ module Embulk
end
end
def self.alwaysYesDownload()
@dataset_utils.deleteSplitOperationControlFile()
@dataset_utils.deleteSplitOperationFile()
@dataset_utils.deleteCompletedFile()
@dataset_utils.createReportFile()
@logger.info("[always yes mode] Download operation will overwrite previous local dataset.", print=TRUE)
end
def self.askUserForAction(task, action, show_message)
option = @dataset_utils.getConfiguration(action, task['tool_dir'])
valid_option = option != DatasetUtils::OPTION_ABORT ? TRUE : FALSE
......@@ -104,7 +112,8 @@ module Embulk
'data_set' => @data_set,
'chunk_size' => DatasetUtils::CHUNK_SIZE + 10,
'output_path' => @output_path,
'tool_dir' => @tool_dir
'tool_dir' => @tool_dir,
'always_yes_mode' => config.param('always_yes', :string)
}
if task['chunk_size'] == 0
task['chunk_size'] = DatasetUtils::CHUNK_SIZE
......@@ -158,10 +167,18 @@ module Embulk
task['data_streams'] = @dataset_utils.getRemoteFileListForDiscardLocalChanges(task['data_streams'], @data_set,
check_changes=FALSE, changes=local_changes)
else
self.askUserForAction(task, action=DatasetUtils::OPTION_UPDATE, show_message=TRUE)
if task['always_yes_mode'] != "true"
self.askUserForAction(task, action=DatasetUtils::OPTION_UPDATE, show_message=TRUE)
else
self.alwaysYesDownload()
end
end
elsif not @dataset_utils.partialIngestionFileExist()
self.askUserForAction(task, action=DatasetUtils::OPTION_RESUME, show_message=TRUE)
if task['always_yes_mode'] != "true"
self.askUserForAction(task, action=DatasetUtils::OPTION_RESUME, show_message=TRUE)
else
self.alwaysYesDownload()
end
else
if @dataset_utils.discardChangesFileExist()
puts
......
......@@ -39,5 +39,6 @@ options:
-s, --storage <storage> Uses the selected input storage from this set: [http, ftp, s3]
-cs, --custom-storage Allows user to set a new input storage
-a, --advanced Allows to edit the Embulk cofiguration file of the input storage
-y, --yes Enables "always yes" mode and ebulk is run assuming yes to all user inputs required
-dc, --discard-changes Discards local changes by checking the remote dataset
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment