tmp: replace MdxEngine executor by the simple wendelin exemple

Based on https://github.com/abilian/olapy/tree/simple_example_wendelin , but using directly numpy arrays for simplicity.

tmp: replace MdxEngine executor by the simple wendelin exemple
Based on https://github.com/abilian/olapy/tree/simple_example_wendelin , but using directly numpy arrays for simplicity.
12434484 · Jérome Perrin · 3911da79 · 12434484 · 12434484 · 12434484
Commit 12434484 authored Aug 08, 2017 by Jérome Perrin
3 changed files
--- a/olapy/core/mdx/executor/execute.py
+++ b/olapy/core/mdx/executor/execute.py
@@ -77,60 +77,10 @@ class MdxEngine:
    @classmethod
    def get_cubes_names(cls):
        """:return: list cubes name that exists in cubes folder (under ~/olapy-data/cubes) and postgres database (if connected)."""
-        # get csv files folders (cubes)
-        # toxworkdir does not expanduser properly under tox
-
-        # surrended with try, except and PASS so we continue getting cubes from different
-        # sources (db, csv...) without interruption
-
-        if 'OLAPY_PATH' in os.environ:
-            home_directory = os.environ.get('OLAPY_PATH')
-        elif cls.DATA_FOLDER is not None:
-            home_directory = os.path.dirname(cls.DATA_FOLDER)
-        elif RUNNING_TOX:
-            home_directory = os.environ.get('HOME_DIR')
-        else:
-            home_directory = expanduser("~")
-
-        olapy_data_location = os.path.join(home_directory, 'olapy-data')
-
-        # surrended with try, except and PASS so we continue getting cubes from different
-        # sources (db, csv...) without interruption
-        cubes_location = os.path.join(olapy_data_location, cls.CUBE_FOLDER)
-        try:
-            MdxEngine.csv_files_cubes = [
-                file for file in os.listdir(cubes_location)
-                if os.path.isdir(os.path.join(cubes_location, file))
-            ]
-        except Exception:
-            type, value, traceback = sys.exc_info()
-            print('Error opening %s: %s' % (value.filename, value.strerror))
-            print('no csv folders')
-            pass
-
-        # get postgres databases
-        # surrended with try, except and PASS so we continue getting cubes from different
-        # sources (db, csv...) without interruption
-        try:
-            db = MyDB(db_config_file_path=olapy_data_location)
-            # TODO this work only with postgres
-            result = db.engine.execute(
-                'SELECT datname FROM pg_database WHERE datistemplate = false;')
-            available_tables = result.fetchall()
-            # cursor.execute("""SELECT datname FROM pg_database
-            # WHERE datistemplate = false;""")
-
-            MdxEngine.postgres_db_cubes = [
-                database[0] for database in available_tables
-            ]
-
-        except Exception:
-            type, value, traceback = sys.exc_info()
-            print('Error opening %s: %s' % (value.filename, value.strerror))
-            print('no database connexion')
-            pass

-        return MdxEngine.csv_files_cubes + MdxEngine.postgres_db_cubes
+        # wendelin
+        # ( we need to return at least one cube )
+        return ["ERP5", ]

    def _get_default_cube_directory(self):

@@ -163,26 +113,9 @@ class MdxEngine:

        :return: dict with key as table name and DataFrame as value
        """
-        config_file_parser = ConfigParser(self.cube_path)
-        tables = {}
-
-        if self.client == 'excel' and config_file_parser.config_file_exist(
-                client_type=self.
-                client) and self.cube in config_file_parser.get_cubes_names(
-                    client_type=self.client):
-            # for web (config file) we need only star_schema_dataframes, not all tables
-            for cubes in config_file_parser.construct_cubes():
-
-                # TODO working with cubes.source == 'csv'
-                if cubes.source == 'postgres':
-                    tables = _load_table_config_file(self, cubes)
-
-        elif self.cube in self.csv_files_cubes:
-            tables = _load_tables_csv_files(self)
-
-        elif self.cube in self.postgres_db_cubes:
-            tables = _load_tables_db(self)
-
+        # wendelin
+        from ...mdx.wendelin_olapy.wendelin_integration import loard_tables
+        tables = loard_tables()
        return tables

    def get_measures(self):
@@ -215,32 +148,10 @@ class MdxEngine:
        :param cube_name: cube name with which we want to generate a star schema model
        :return: star schema DataFrame
        """
-        fusion = None
-        config_file_parser = ConfigParser(self.cube_path)
-        if config_file_parser.config_file_exist(
-                self.
-                client) and self.cube in config_file_parser.get_cubes_names(
-                    client_type=self.client):
-            for cubes in config_file_parser.construct_cubes(self.client):
-                # TODO cubes.source == 'csv'
-                if cubes.source == 'postgres':
-                    # TODO one config file (I will try to merge dimensions between them in web part)
-                    if self.client == 'web':
-                        fusion = _construct_web_star_schema_config_file(self,
-                                                                        cubes)
-                    else:
-                        fusion = _construct_star_schema_config_file(self,
-                                                                    cubes)
-
-        elif self.cube in self.csv_files_cubes:
-            fusion = _construct_star_schema_csv_files(self)
-
-        elif self.cube in self.postgres_db_cubes:
-            fusion = _construct_star_schema_db(self)
-
-        return fusion[[
-            col for col in fusion.columns if col.lower()[-3:] != '_id'
-        ]]
+        # wendelin
+        from ...mdx.wendelin_olapy.wendelin_integration import merge
+        fusion = merge()
+        return fusion

    def get_all_tables_names(self, ignore_fact=False):
        """

--- a/olapy/core/mdx/wendelin_olapy/__init__.py
+++ b/olapy/core/mdx/wendelin_olapy/__init__.py
--- a/olapy/core/mdx/wendelin_olapy/wendelin_integration.py
+++ b/olapy/core/mdx/wendelin_olapy/wendelin_integration.py
+import pandas as pd
+import numpy as np
+
+
+def mock_big_array():
+
+    # mock a facts table
+    f = np.array([
+        [1, 1000],
+        [2, 5222],
+        [3, 20000],
+        [4, 66666],
+        [5, 888888],
+        [6, 777777],
+        [7, 333333],
+        [8, 1111111]
+    ])
+
+    # mock a dimension
+    d = np.array([
+        [1, 'data1', 'data8'],
+        [2, 'data4', 'data9'],
+        [3, 'data3', 'data9'],
+        [4, 'data4', 'data5'],
+        [5, 'data3', 'data2'],
+        [6, 'data4', 'data5'],
+        [7, 'data1', 'data2'],
+        [8, 'data4', 'data5']
+    ])
+
+    return {'Facts': f,
+            'dimension': d}
+
+
+def to_dataframe(ndarray,dim_type='dimension'):
+    """
+
+    data -> ndarray[1:, 1:]
+    index -> ndarray[1:, 0]
+    columns -> ndarray[0, 1:]
+
+    :param ndarray: numpy multidimentional array
+    :return:
+    """
+
+    if dim_type == 'facts':
+        column_name = 'measure'
+    else:
+        column_name = 'column'
+
+    # generate dataframe columns names
+    # ['id', 'column0', 'column1'....]
+    columns = ['id'] + [column_name +  str(idx) for idx, val in enumerate(ndarray[0, 1:]) ]
+
+
+    df = pd.DataFrame(data=ndarray,
+                      columns=columns)
+
+    # convert id column to integer
+    df[['id']] = df[['id']].astype(int)
+
+    return df
+
+
+def loard_tables():
+
+    ndarrays = mock_big_array()
+
+    facts = to_dataframe(ndarrays['Facts'],dim_type='facts')
+
+    dim = to_dataframe(ndarrays['dimension'])
+
+
+    # remove id column
+    facts = facts[[col for col in facts.columns if col != 'id']]
+
+    # remove id column
+    dim = dim[[col for col in dim.columns if col != 'id']]
+
+
+    return {'Facts' : facts,
+            'Dim' : dim}
+
+def merge():
+
+    # todo merge directly from load_tables() above
+
+    ndarrays = mock_big_array()
+
+    dim = to_dataframe(ndarrays['dimension'])
+
+    facts = to_dataframe(ndarrays['Facts'], dim_type='facts')
+
+    df = facts.merge(dim,on='id')
+
+    return df[[col for col in df.columns if col != 'id']]
+
+# AND then use load_tables and merge in olapy's execte module directly
+
+# ligne 152 : load_tables() -> get all dimensions and facts
+#
+# ligne 207 : get_star_schema_dataframe() -> call the merged dataframe
+