split execute into db, csv and config_file execute

12be070e · mouadh · 1bf75ab7 · 12be070e · 12be070e · 12be070e
Commit 12be070e authored May 12, 2017 by mouadh
4 changed files
--- a/olapy/core/mdx/executor/execute.py
+++ b/olapy/core/mdx/executor/execute.py
--- a/olapy/core/mdx/executor/execute_config_file.py
+++ b/olapy/core/mdx/executor/execute_config_file.py
+from ..tools.connection import MyDB
+import pandas.io.sql as psql
+
+
+def _load_table_config_file(executer_instance, cube_obj):
+    """
+    Load tables from config file.
+
+    :param cube_obj: cubes object
+    :return: tables dict with table name as key and DataFrame as value
+    """
+    tables = {}
+    # just one facts table right now
+    executer_instance.facts = cube_obj.facts[0].table_name
+
+    db = MyDB(db=executer_instance.cube)
+
+    for table in cube_obj.dimensions:
+        value = psql.read_sql_query("SELECT * FROM {0}".format(table.name),
+                                    db.connection)
+
+        tables[table.name] = value[[
+            col for col in value.columns if col.lower()[-3:] != '_id'
+        ]]
+
+    # update table display name
+    for dimension in cube_obj.dimensions:
+        if dimension.displayName and dimension.name and dimension.displayName != dimension.name:
+            tables[dimension.displayName] = tables[dimension.name][
+                dimension.columns]
+            executer_instance.dimension_display_name.append(dimension.name)
+
+    return tables
+
+
+def _construct_star_schema_config_file(executer_instance, cubes_obj):
+    """
+    Construct star schema DataFrame from configuration file.
+
+    :param cube_name:  cube name (or database name)
+    :param cubes_obj: cubes object
+    :return: star schema DataFrame
+    """
+    executer_instance.facts = cubes_obj.facts[0].table_name
+    db = MyDB(db=executer_instance.cube)
+    # load facts table
+    fusion = psql.read_sql_query(
+        "SELECT * FROM {0}".format(executer_instance.facts), db.connection)
+
+    for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items():
+        df = psql.read_sql_query(
+            "SELECT * FROM {0}".format(dimension_and_key.split('.')[0]),
+            db.connection)
+
+        fusion = fusion.merge(
+            df, left_on=fact_key, right_on=dimension_and_key.split('.')[1])
+
+        # TODO CHOSE BETWEEN THOSES DF
+        # if separated dimensions
+        # fusion = fusion.merge(df, left_on=fact_key,right_on=dimension_and_key.split('.')[1])
+
+    # TODO CHOSE BETWEEN THOSES DF
+    # if facts contains all dimensions
+    # fusion = facts
+
+    # measures in config-file only
+    if cubes_obj.facts[0].measures:
+        executer_instance.measures = cubes_obj.facts[0].measures
+
+    return fusion
+
+
+def _construct_web_star_schema_config_file(executer_instance, cubes_obj):
+    """
+    Construct star schema DataFrame from configuration file.
+
+    :param cube_name:  cube name (or database name)
+    :param cubes_obj: cubes object
+    :return: star schema DataFrame
+    """
+    all_columns = []
+
+    executer_instance.facts = cubes_obj.facts[0].table_name
+    db = MyDB(db=executer_instance.cube)
+    # load facts table
+
+    # measures in config-file only
+    if cubes_obj.facts[0].measures:
+        executer_instance.measures = cubes_obj.facts[0].measures
+        all_columns += cubes_obj.facts[0].measures
+
+    fusion = psql.read_sql_query(
+        "SELECT * FROM {0}".format(executer_instance.facts), db.connection)
+
+    tables = {}
+    for table in cubes_obj.tables:
+
+        tab = psql.read_sql_query("SELECT * FROM {0}".format(table.name),
+                                  db.connection)
+
+        try:
+            if table.columns:
+                tab = tab[table.columns]
+
+        except:
+            print("table columns doesn't exist")
+            print('pass with all columns')
+
+        try:
+            if table.new_names:
+                tab = tab.rename(columns=table.new_names)
+
+        except:
+            print("verify your old and new columns names")
+            print('pass with no change')
+
+        all_columns += list(tab.columns)
+        tables.update({table.name: tab})
+
+    for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items():
+        dimension_name = dimension_and_key.split('.')[0]
+        if dimension_name in tables.keys():
+            df = tables[dimension_name]
+        else:
+            df = psql.read_sql_query(
+                "SELECT * FROM {0}".format(dimension_and_key.split('.')[0]),
+                db.connection)
+
+        fusion = fusion.merge(
+            df, left_on=fact_key, right_on=dimension_and_key.split('.')[1])
+
+    return fusion[[column for column in all_columns if 'id' != column[-2:]]]
--- a/olapy/core/mdx/executor/execute_csv_files.py
+++ b/olapy/core/mdx/executor/execute_csv_files.py
+import os
+
+import pandas as pd
+
+
+def _load_tables_csv_files(executer_instance):
+    """
+    Load tables from csv files.
+
+    :return: tables dict with table name as key and dataframe as value
+    """
+    tables = {}
+    cube = executer_instance.get_cube()
+    for file in os.listdir(cube):
+        # to remove file extension ".csv"
+        table_name = os.path.splitext(file)[0]
+        value = pd.read_csv(
+            os.path.join(cube, file), sep=executer_instance.sep)
+        tables[table_name] = value[[
+            col for col in value.columns if col.lower()[-3:] != '_id'
+        ]]
+
+    return tables
+
+
+def _construct_star_schema_csv_files(executer_instance):
+    """
+    Construct star schema DataFrame from csv files.
+
+    :param cube_name:  cube name (folder name)
+    :return: star schema DataFrame
+    """
+    cube = executer_instance.get_cube()
+    # loading facts table
+    fusion = pd.read_csv(
+        os.path.join(cube, executer_instance.facts + '.csv'),
+        sep=executer_instance.sep)
+    for file_name in os.listdir(cube):
+        try:
+            fusion = fusion.merge(
+                pd.read_csv(
+                    os.path.join(cube, file_name), sep=executer_instance.sep))
+        except:
+            print('No common column')
+            pass
+
+    return fusion
--- a/olapy/core/mdx/executor/execute_db.py
+++ b/olapy/core/mdx/executor/execute_db.py
+from ..tools.connection import MyDB
+import pandas.io.sql as psql
+
+
+def _load_tables_db(executer_instance):
+    """
+    Load tables from database.
+
+    :return: tables dict with table name as key and dataframe as value
+    """
+    tables = {}
+    db = MyDB(db=executer_instance.cube)
+    cursor = db.connection.cursor()
+    cursor.execute("""SELECT table_name FROM information_schema.tables
+                      WHERE table_schema = 'public'""")
+
+    for table_name in cursor.fetchall():
+        value = psql.read_sql_query(
+            'SELECT * FROM "{0}" '.format(table_name[0]), db.connection)
+
+        tables[table_name[0]] = value[[
+            col for col in value.columns if col.lower()[-3:] != '_id'
+        ]]
+    return tables
+
+
+def _construct_star_schema_db(executer_instance):
+    """
+    Construct star schema DataFrame from database.
+
+    :param cube_name:  cube name (database name)
+    :return: star schema DataFrame
+    """
+    db = MyDB(db=executer_instance.cube)
+
+    # load facts table
+    fusion = psql.read_sql_query(
+        'SELECT * FROM "{0}" '.format(executer_instance.facts), db.connection)
+
+    cursor = db.connection.cursor()
+    cursor.execute("""SELECT table_name FROM information_schema.tables
+                          WHERE table_schema = 'public'""")
+    for db_table_name in cursor.fetchall():
+        try:
+            fusion = fusion.merge(
+                psql.read_sql_query("SELECT * FROM {0}".format(
+                    db_table_name[0]), db.connection))
+        except:
+            print('No common column')
+            pass
+
+    return fusion