Commit 1fb79bb3 authored by mouadh's avatar mouadh

Merge branch 'update_excel_config_file'

# Conflicts:
#	requirements.txt
parents f4fd1e91 c85f307c
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<!-- this config file will be deleted ASAP -->
<olapy> <olapy>
<database> <database>
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
<user_name>postgres</user_name> <user_name>postgres</user_name>
<password>root</password> <password>root</password>
<host>localhost</host> <host>localhost</host>
<port>5432</port>
</database> </database>
......
...@@ -33,12 +33,14 @@ class MdxEngine: ...@@ -33,12 +33,14 @@ class MdxEngine:
:param sep: separator in the csv files :param sep: separator in the csv files
""" """
# DATA_FOLDER useful for olapy web (falsk instance_path)
# get olapy-data path with instance_path instead of 'expanduser'
DATA_FOLDER = None
CUBE_FOLDER = "cubes" CUBE_FOLDER = "cubes"
# (before instantiate MdxEngine I need to access cubes information) # (before instantiate MdxEngine I need to access cubes information)
csv_files_cubes = [] csv_files_cubes = []
postgres_db_cubes = [] postgres_db_cubes = []
# to show just config file's dimensions # to show just config file's dimensions
dimension_display_name = []
def __init__(self, def __init__(self,
cube_name, cube_name,
...@@ -64,8 +66,8 @@ class MdxEngine: ...@@ -64,8 +66,8 @@ class MdxEngine:
self.client = client_type self.client = client_type
self.tables_loaded = self.load_tables() self.tables_loaded = self.load_tables()
# all measures # all measures
self.load_star_schema_dataframe = self.get_star_schema_dataframe()
self.measures = self.get_measures() self.measures = self.get_measures()
self.load_star_schema_dataframe = self.get_star_schema_dataframe()
self.tables_names = self._get_tables_name() self.tables_names = self._get_tables_name()
# default measure is the first one # default measure is the first one
self.selected_measures = [self.measures[0]] self.selected_measures = [self.measures[0]]
...@@ -75,32 +77,42 @@ class MdxEngine: ...@@ -75,32 +77,42 @@ class MdxEngine:
""":return: list cubes name that exists in cubes folder (under ~/olapy-data/cubes) and postgres database (if connected).""" """:return: list cubes name that exists in cubes folder (under ~/olapy-data/cubes) and postgres database (if connected)."""
# get csv files folders (cubes) # get csv files folders (cubes)
# toxworkdir does not expanduser properly under tox # toxworkdir does not expanduser properly under tox
if RUNNING_TOX:
# surrended with try, except and PASS so we continue getting cubes from different
# sources (db, csv...) without interruption
if cls.DATA_FOLDER is not None:
home_directory = os.path.dirname(cls.DATA_FOLDER)
elif RUNNING_TOX:
home_directory = os.environ.get('HOME_DIR') home_directory = os.environ.get('HOME_DIR')
else: else:
home_directory = expanduser("~") home_directory = expanduser("~")
location = os.path.join(home_directory, 'olapy-data', cls.CUBE_FOLDER) location = os.path.join(home_directory, 'olapy-data', cls.CUBE_FOLDER)
# surrended with try, except and PASS so we continue getting cubes from different
# sources (db, csv...) without interruption
try: try:
MdxEngine.csv_files_cubes = [ MdxEngine.csv_files_cubes = [
file for file in os.listdir(location) file for file in os.listdir(location)
if os.path.isdir(os.path.join(location, file)) if os.path.isdir(os.path.join(location, file))
] ]
except Exception: except Exception:
print('no csv folders') print('no csv folders')
pass pass
# get postgres databases # get postgres databases
# surrended with try, except and PASS so we continue getting cubes from different
# sources (db, csv...) without interruption
try: try:
db = MyDB() db = MyDB(db_config_file_path=cls.DATA_FOLDER)
cursor = db.connection.cursor() # TODO this work only with postgres
cursor.execute("""SELECT datname FROM pg_database result = db.engine.execute('SELECT datname FROM pg_database WHERE datistemplate = false;')
WHERE datistemplate = false;""") available_tables = result.fetchall()
# cursor.execute("""SELECT datname FROM pg_database
# WHERE datistemplate = false;""")
MdxEngine.postgres_db_cubes = [ MdxEngine.postgres_db_cubes = [
database[0] for database in cursor.fetchall() database[0] for database in available_tables
] ]
except Exception: except Exception:
...@@ -112,12 +124,19 @@ class MdxEngine: ...@@ -112,12 +124,19 @@ class MdxEngine:
def _get_default_cube_directory(self): def _get_default_cube_directory(self):
# toxworkdir does not expanduser properly under tox # toxworkdir does not expanduser properly under tox
if RUNNING_TOX: if 'OLAPY_PATH' in os.environ:
home_directory = os.environ.get('OLAPY_PATH')
elif MdxEngine.DATA_FOLDER is not None:
home_directory = MdxEngine.DATA_FOLDER
elif RUNNING_TOX:
home_directory = os.environ.get('HOME_DIR') home_directory = os.environ.get('HOME_DIR')
else: else:
home_directory = expanduser("~") home_directory = expanduser("~")
return os.path.join(home_directory, 'olapy-data', self.cube_folder) if 'olapy-data' not in home_directory:
home_directory = os.path.join(home_directory, 'olapy-data')
return os.path.join(home_directory, self.cube_folder)
def _get_tables_name(self): def _get_tables_name(self):
""" """
...@@ -135,6 +154,7 @@ class MdxEngine: ...@@ -135,6 +154,7 @@ class MdxEngine:
""" """
config_file_parser = ConfigParser(self.cube_path) config_file_parser = ConfigParser(self.cube_path)
tables = {} tables = {}
if config_file_parser.config_file_exist( if config_file_parser.config_file_exist(
) and self.cube in config_file_parser.get_cubes_names( ) and self.cube in config_file_parser.get_cubes_names(
) and self.client != 'web': ) and self.client != 'web':
...@@ -155,6 +175,20 @@ class MdxEngine: ...@@ -155,6 +175,20 @@ class MdxEngine:
def get_measures(self): def get_measures(self):
""":return: all numerical columns in facts table.""" """:return: all numerical columns in facts table."""
# col.lower()[-2:] != 'id' to ignore any id column # col.lower()[-2:] != 'id' to ignore any id column
# if web get measures from config file
config_file_parser = ConfigParser(self.cube_path)
if self.client == 'web' and config_file_parser.config_file_exist('web'):
for cubes in config_file_parser.construct_cubes(self.client):
# TODO temp
# update facts name
self.facts = cubes.facts[0].table_name
if cubes.facts[0].measures:
return cubes.facts[0].measures
return [ return [
col col
for col in self.tables_loaded[self.facts].select_dtypes( for col in self.tables_loaded[self.facts].select_dtypes(
...@@ -169,7 +203,6 @@ class MdxEngine: ...@@ -169,7 +203,6 @@ class MdxEngine:
:return: star schema DataFrame :return: star schema DataFrame
""" """
fusion = None fusion = None
config_file_parser = ConfigParser(self.cube_path) config_file_parser = ConfigParser(self.cube_path)
if config_file_parser.config_file_exist( if config_file_parser.config_file_exist(
self.client self.client
...@@ -213,6 +246,8 @@ class MdxEngine: ...@@ -213,6 +246,8 @@ class MdxEngine:
:return: path to the cube :return: path to the cube
""" """
if MdxEngine.DATA_FOLDER is not None:
return os.path.join(MdxEngine.DATA_FOLDER, MdxEngine.CUBE_FOLDER, self.cube)
return os.path.join(self.cube_path, self.cube) return os.path.join(self.cube_path, self.cube)
# TODO temporary function # TODO temporary function
......
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from ..tools.mem_bench import memory_usage
from ..tools.connection import MyDB from ..tools.connection import MyDB
import pandas.io.sql as psql import pandas.io.sql as psql
import os
def _load_table_config_file(executer_instance, cube_obj): def _load_table_config_file(executer_instance, cube_obj):
...@@ -15,22 +17,31 @@ def _load_table_config_file(executer_instance, cube_obj): ...@@ -15,22 +17,31 @@ def _load_table_config_file(executer_instance, cube_obj):
# just one facts table right now # just one facts table right now
executer_instance.facts = cube_obj.facts[0].table_name executer_instance.facts = cube_obj.facts[0].table_name
db = MyDB(db=executer_instance.cube) db = MyDB(db_config_file_path=os.path.dirname(executer_instance.cube_path), db=executer_instance.cube)
memory_usage("1 - before executing query //// _load_table_config_file")
for dimension in cube_obj.dimensions:
for table in cube_obj.dimensions: df = psql.read_sql_query("SELECT * FROM {0}".format(dimension.name),
value = psql.read_sql_query("SELECT * FROM {0}".format(table.name), db.engine)
db.connection) # only certain columns
if dimension.columns.keys():
df = df[dimension.columns.keys()]
tables[table.name] = value[[ # change table display name
col for col in value.columns if col.lower()[-3:] != '_id' if dimension.displayName:
table_name = dimension.displayName
else:
table_name = dimension.name
# rename columns if value not None
df.rename(columns=(dict((k, v) for k, v in dimension.columns.items() if v)), inplace=True)
tables[table_name] = df[[
col for col in df.columns if col.lower()[-2:] != 'id'
]] ]]
# update table display name memory_usage("2 - after query, before fetchall /////// _load_table_config_file")
for dimension in cube_obj.dimensions:
if dimension.displayName and dimension.name and dimension.displayName != dimension.name:
tables[dimension.displayName] = tables[dimension.name][
dimension.columns]
executer_instance.dimension_display_name.append(dimension.name)
return tables return tables
...@@ -44,22 +55,76 @@ def _construct_star_schema_config_file(executer_instance, cubes_obj): ...@@ -44,22 +55,76 @@ def _construct_star_schema_config_file(executer_instance, cubes_obj):
:return: star schema DataFrame :return: star schema DataFrame
""" """
executer_instance.facts = cubes_obj.facts[0].table_name executer_instance.facts = cubes_obj.facts[0].table_name
db = MyDB(db=executer_instance.cube) db = MyDB(db_config_file_path=os.path.dirname(executer_instance.cube_path), db=executer_instance.cube)
# load facts table # load facts table
memory_usage("1 - before executing query //// _construct_star_schema_config_file")
fusion = psql.read_sql_query( fusion = psql.read_sql_query(
"SELECT * FROM {0}".format(executer_instance.facts), db.connection) "SELECT * FROM {0}".format(executer_instance.facts), db.engine)
for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items(): for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items():
df = psql.read_sql_query( df = psql.read_sql_query(
"SELECT * FROM {0}".format(dimension_and_key.split('.')[0]), "SELECT * FROM {0}".format(dimension_and_key.split('.')[0]),
db.connection) db.engine)
for dimension in cubes_obj.dimensions:
if dimension_and_key.split('.')[0] == dimension.name:
df.rename(columns=dimension.columns, inplace=True)
# todo test with this
fusion = fusion.merge( fusion = fusion.merge(
df, left_on=fact_key, right_on=dimension_and_key.split('.')[1]) df, left_on=fact_key, right_on=dimension_and_key.split('.')[1])
# TODO CHOSE BETWEEN THOSES DF # TODO CHOSE BETWEEN THOSES DF
# if separated dimensions # fusion = fusion.merge(
# fusion = fusion.merge(df, left_on=fact_key,right_on=dimension_and_key.split('.')[1]) # df, left_on=fact_key, right_on=dimension_and_key.split('.')[1], how='left',
# # remove suffixe from dimension and keep the same column name for facts
# suffixes=('', '_y'))
memory_usage("2 - after query, before fetchall /////// _construct_star_schema_config_file")
# TODO CHOSE BETWEEN THOSES DF
# if separated dimensions
# fusion = fusion.merge(df, left_on=fact_key,right_on=dimension_and_key.split('.')[1])
# TODO CHOSE BETWEEN THOSES DF
# if facts contains all dimensions
# fusion = facts
# measures in config-file only
if cubes_obj.facts[0].measures:
executer_instance.measures = cubes_obj.facts[0].measures
return fusion
def _construct_star_schema_config_file_OLD(executer_instance, cubes_obj):
"""
Construct star schema DataFrame from configuration file.
:param cube_name: cube name (or database name)
:param cubes_obj: cubes object
:return: star schema DataFrame
"""
executer_instance.facts = cubes_obj.facts[0].table_name
db = MyDB(db=executer_instance.cube)
# load facts table
memory_usage("1 - before executing query //// _construct_star_schema_config_file")
fusion = psql.read_sql_query(
"SELECT * FROM {0}".format(executer_instance.facts), db.engine)
for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items():
df = psql.read_sql_query(
"SELECT * FROM {0}".format(dimension_and_key.split('.')[0]),
db.engine)
fusion = fusion.merge(
df, left_on=fact_key, right_on=dimension_and_key.split('.')[1])
memory_usage("2 - after query, before fetchall /////// _construct_star_schema_config_file")
# TODO CHOSE BETWEEN THOSES DF
# if separated dimensions
# fusion = fusion.merge(df, left_on=fact_key,right_on=dimension_and_key.split('.')[1])
# TODO CHOSE BETWEEN THOSES DF # TODO CHOSE BETWEEN THOSES DF
# if facts contains all dimensions # if facts contains all dimensions
...@@ -83,20 +148,24 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj): ...@@ -83,20 +148,24 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj):
all_columns = [] all_columns = []
executer_instance.facts = cubes_obj.facts[0].table_name executer_instance.facts = cubes_obj.facts[0].table_name
db = MyDB(db=executer_instance.cube) db = MyDB(db_config_file_path=os.path.dirname(executer_instance.cube_path), db=executer_instance.cube)
# load facts table # load facts table
if cubes_obj.facts[0].columns: if cubes_obj.facts[0].columns:
all_columns += cubes_obj.facts[0].columns all_columns += cubes_obj.facts[0].columns
memory_usage("1 - before executing query //// 1111 _construct_web_star_schema_config_file ")
fusion = psql.read_sql_query( fusion = psql.read_sql_query(
"SELECT * FROM {0}".format(executer_instance.facts), db.connection) "SELECT * FROM {0}".format(executer_instance.facts), db.engine)
memory_usage("2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file")
tables = {} tables = {}
memory_usage("1 - before executing query //// 3333333333 _construct_web_star_schema_config_file ")
for table in cubes_obj.tables: for table in cubes_obj.tables:
tab = psql.read_sql_query("SELECT * FROM {0}".format(table.name), tab = psql.read_sql_query("SELECT * FROM {0}".format(table.name),
db.connection) db.engine)
try: try:
if table.columns: if table.columns:
...@@ -117,11 +186,14 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj): ...@@ -117,11 +186,14 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj):
all_columns += list(tab.columns) all_columns += list(tab.columns)
tables.update({table.name: tab}) tables.update({table.name: tab})
memory_usage("2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file")
# measures in config-file only # measures in config-file only
if cubes_obj.facts[0].measures: if cubes_obj.facts[0].measures:
executer_instance.measures = cubes_obj.facts[0].measures executer_instance.measures = cubes_obj.facts[0].measures
all_columns += cubes_obj.facts[0].measures all_columns += cubes_obj.facts[0].measures
memory_usage("1 - before executing query //// 55555555 _construct_web_star_schema_config_file ")
for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items(): for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items():
dimension_name = dimension_and_key.split('.')[0] dimension_name = dimension_and_key.split('.')[0]
if dimension_name in tables.keys(): if dimension_name in tables.keys():
...@@ -129,13 +201,15 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj): ...@@ -129,13 +201,15 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj):
else: else:
df = psql.read_sql_query( df = psql.read_sql_query(
"SELECT * FROM {0}".format(dimension_and_key.split('.')[0]), "SELECT * FROM {0}".format(dimension_and_key.split('.')[0]),
db.connection) db.engine)
# TODO check merge (how) # TODO check merge (how)
fusion = fusion.merge( fusion = fusion.merge(
df, left_on=fact_key, right_on=dimension_and_key.split('.')[1], how='left', df, left_on=fact_key, right_on=dimension_and_key.split('.')[1], how='left',
# remove suffixe from dimension and keep the same column name for facts # remove suffixe from dimension and keep the same column name for facts
suffixes=('', '_y')) suffixes=('', '_y'))
memory_usage("2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file")
return fusion[[column for column in all_columns if 'id' != column[-2:]]] return fusion[[column for column in all_columns if 'id' != column[-2:]]]
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from sqlalchemy import inspect
import pandas as pd
from ..tools.mem_bench import memory_usage
from ..tools.connection import MyDB from ..tools.connection import MyDB
import pandas.io.sql as psql import pandas.io.sql as psql
class StringFolder(object):
"""
Class that will fold strings. See 'fold_string'.
This object may be safely deleted or go out of scope when
strings have been folded.
"""
def __init__(self):
self.unicode_map = {}
def fold_string(self, s):
"""
Given a string (or unicode) parameter s, return a string object
that has the same value as s (and may be s). For all objects
with a given value, the same object will be returned. For unicode
objects that can be coerced to a string with the same value, a
string object will be returned.
If s is not a string or unicode object, it is returned unchanged.
:param s: a string or unicode object.
:return: a string or unicode object.
"""
# If s is not a string or unicode object, return it unchanged
if not isinstance(s, basestring):
return s
# If s is already a string, then str() has no effect.
# If s is Unicode, try and encode as a string and use intern.
# If s is Unicode and can't be encoded as a string, this try
# will raise a UnicodeEncodeError.
try:
return intern(str(s))
except UnicodeEncodeError:
# Fall through and handle s as Unicode
pass
# Look up the unicode value in the map and return
# the object from the map. If there is no matching entry,
# store this unicode object in the map and return it.
return self.unicode_map.setdefault(s, s)
def string_folding_wrapper(results):
"""
This generator yields rows from the results as tuples,
with all string values folded.
"""
# Get the list of keys so that we build tuples with all
# the values in key order.
keys = results.keys()
folder = StringFolder()
for row in results:
yield tuple(
folder.fold_string(row[key])
for key in keys
)
# TODO try pandas.read_sql_table and pandas.read_sql
def _load_tables_db(executer_instance): def _load_tables_db(executer_instance):
""" """
Load tables from database. Load tables from database.
...@@ -11,18 +71,40 @@ def _load_tables_db(executer_instance): ...@@ -11,18 +71,40 @@ def _load_tables_db(executer_instance):
:return: tables dict with table name as key and dataframe as value :return: tables dict with table name as key and dataframe as value
""" """
tables = {} tables = {}
db = MyDB(db=executer_instance.cube) db = MyDB(db_config_file_path=executer_instance.DATA_FOLDER,db=executer_instance.cube)
cursor = db.connection.cursor() inspector = inspect(db.engine)
cursor.execute("""SELECT table_name FROM information_schema.tables
WHERE table_schema = 'public'""") memory_usage("1 - before executing query //// _load_tables_db")
for table_name in inspector.get_table_names():
for table_name in cursor.fetchall(): # value = psql.read_sql_query(
value = psql.read_sql_query( # 'SELECT * FROM "{0}"'.format(table_name), db.engine)
'SELECT * FROM "{0}" '.format(table_name[0]), db.connection)
tables[table_name[0]] = value[[ # results = db.engine.execute('SELECT * FROM "{0}"'.format(table_name))
results = db.engine.execution_options(stream_results=True).execute('SELECT * FROM "{0}"'.format(table_name))
# Fetch all the results of the query
value = pd.DataFrame(iter(results),columns=results.keys()) # Pass results as an iterator
# with string_folding_wrapper we loose response time
# value = pd.DataFrame(string_folding_wrapper(results),columns=results.keys())
tables[table_name] = value[[
col for col in value.columns if col.lower()[-3:] != '_id' col for col in value.columns if col.lower()[-3:] != '_id'
]] ]]
# tables = {}
# db = MyDB(db_config_file_path=executer_instance.DATA_FOLDER, db=executer_instance.cube)
# # inspector = inspect(db.engine)
# cursor = db.engine.cursor()
# cursor.execute("""SELECT table_name FROM information_schema.tables
# WHERE table_schema = 'public'""")
#
# for table_name in cursor.fetchall():
# value = psql.read_sql_query(
# 'SELECT * FROM "{0}" '.format(table_name[0]), db.engine)
#
# tables[table_name[0]] = value[[
# col for col in value.columns if col.lower()[-3:] != '_id'
# ]]
memory_usage("2 - after query, before fetchall /////// _load_tables_db")
return tables return tables
...@@ -34,21 +116,22 @@ def _construct_star_schema_db(executer_instance): ...@@ -34,21 +116,22 @@ def _construct_star_schema_db(executer_instance):
:return: star schema DataFrame :return: star schema DataFrame
""" """
db = MyDB(db=executer_instance.cube) db = MyDB(db=executer_instance.cube)
memory_usage("1 - before executing query //// _construct_star_schema_db")
# load facts table # load facts table
fusion = psql.read_sql_query( with db.engine as connection:
'SELECT * FROM "{0}" '.format(executer_instance.facts), db.connection) fusion = psql.read_sql_query(
'SELECT * FROM "{0}" '.format(executer_instance.facts), connection)
cursor = db.connection.cursor() inspector = inspect(connection)
cursor.execute("""SELECT table_name FROM information_schema.tables
WHERE table_schema = 'public'""") for db_table_name in inspector.get_table_names():
for db_table_name in cursor.fetchall(): try:
try: fusion = fusion.merge(
fusion = fusion.merge( psql.read_sql_query("SELECT * FROM {0}".format(
psql.read_sql_query("SELECT * FROM {0}".format( db_table_name[0]), connection))
db_table_name[0]), db.connection)) except:
except: print('No common column')
print('No common column') pass
pass
memory_usage("2 - after query, before fetchall /////// _construct_star_schema_db")
return fusion return fusion
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# CAVEAT UTILITOR
#
# This file was automatically generated by Grako.
#
# https://pypi.python.org/pypi/grako/
#
# Any changes you make to it will be overwritten the next time
# the file is generated.
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from grako.buffering import Buffer
from grako.parsing import Parser, graken
from grako.util import RE_FLAGS, generic_main, re # noqa
__all__ = ['MdxParserGen', 'UnknownSemantics', 'main']
KEYWORDS = {}
class UnknownBuffer(Buffer):
"""
main class for parsing MDX query generated by grako
"""
def __init__(self,
text,
whitespace=None,
nameguard=None,
comments_re=None,
eol_comments_re=None,
ignorecase=None,
namechars='',
**kwargs):
super(UnknownBuffer, self).__init__(
text,
whitespace=whitespace,
nameguard=nameguard,
comments_re=comments_re,
eol_comments_re=eol_comments_re,
ignorecase=ignorecase,
namechars=namechars,
**kwargs)
class MdxParserGen(Parser):
def __init__(self,
whitespace=None,
nameguard=None,
comments_re=None,
eol_comments_re=None,
ignorecase=None,
left_recursion=False,
parseinfo=True,
keywords=None,
namechars='',
buffer_class=UnknownBuffer,
**kwargs):
if keywords is None:
keywords = KEYWORDS
super(MdxParserGen, self).__init__(
whitespace=whitespace,
nameguard=nameguard,
comments_re=comments_re,
eol_comments_re=eol_comments_re,
ignorecase=ignorecase,
left_recursion=left_recursion,
parseinfo=parseinfo,
keywords=keywords,
namechars=namechars,
buffer_class=buffer_class,
**kwargs)
@graken()
def _MDX_statement_(self):
self._select_statement_()
@graken('SelectStatement')
def _select_statement_(self):
self._token('SELECT')
self.name_last_node('name')
with self._optional():
self._axis_specification_()
self.name_last_node('axis_specification_columns')
with self._optional():
self._token(',')
self._axis_specification_()
self.name_last_node('axis_specification_rows')
self._token('FROM')
self._cube_specification_()
self.name_last_node('cube_specification')
with self._optional():
self._token('WHERE')
self._condition_specification_()
self.name_last_node('condition_specification')
self._check_eof()
self.ast._define([
'axis_specification_columns', 'axis_specification_rows',
'condition_specification', 'cube_specification', 'name'
], [])
@graken()
def _axis_specification_(self):
with self._optional():
self._left_accolade_()
with self._optional():
self._fetch_form_()
self._dim_props_place_()
self.name_last_node('@')
with self._optional():
self._right_accolade_()
self._token('ON')
self._axis_name_()
@graken()
def _dim_props_place_(self):
with self._optional():
self._left_parentheses_()
self._dim_props_type_()
self.name_last_node('@')
with self._optional():
self._right_parentheses_()
@graken()
def _dim_props_type_(self):
with self._optional():
self._left_accolade_()
with self._optional():
self._fetch_type_()
self._dim_props_op_l1_()
self.name_last_node('@')
with self._optional():
self._right_accolade_()
with self._optional():
def block1():
self._operator_()
self.name_last_node('@')
self._dim_props_type_()
self._closure(block1)
@graken()
def _dim_props_op_l1_(self):
with self._optional():
self._left_parentheses_()
self._dim_props_op_()
self.name_last_node('@')
with self._optional():
self._right_parentheses_()
@graken()
def _dim_props_op_(self):
with self._optional():
self._left_accolade_()
self._dim_props_ligne_()
self.name_last_node('@')
with self._optional():
self._right_accolade_()
with self._optional():
def block1():
self._comma_()
self.name_last_node('@')
self._dim_props_op_()
self._closure(block1)
@graken()
def _dim_props_ligne_(self):
with self._optional():
self._left_parentheses_()
self._dimension_place_()
self.name_last_node('@')
with self._optional():
self._right_parentheses_()
with self._optional():
def block1():
with self._group():
with self._choice():
with self._option():
self._comma_()
self.name_last_node('@')
with self._option():
self._dpoint_()
self.name_last_node('@')
self._error('no available options')
self._dim_props_ligne_()
self.name_last_node('@')
self._closure(block1)
@graken()
def _dimension_place_(self):
with self._choice():
with self._option():
with self._optional():
self._left_accolade_()
self._dim_props_()
self.name_last_node('@')
with self._optional():
self._point_()
self._laste_node_()
self.name_last_node('@')
with self._optional():
self._comma_()
self.name_last_node('@')
self._dim_props_()
self.name_last_node('@')
with self._optional():
self._point_()
self._laste_node_()
self.name_last_node('@')
with self._optional():
self._dpoint_()
self.name_last_node('@')
self._dim_props_()
self.name_last_node('@')
with self._optional():
self._point_()
self._laste_node_()
self.name_last_node('@')
with self._optional():
self._right_accolade_()
with self._option():
self._dimension_shortcut_()
self.name_last_node('@')
self._error('no available options')
@graken()
def _dim_props_(self):
def block0():
with self._optional():
self._point_()
with self._optional():
self._left_bracket_()
self._dimension_()
self.name_last_node('@')
with self._optional():
self._right_bracket_()
self._closure(block0)
@graken()
def _laste_node_(self):
with self._choice():
with self._option():
self._token('members')
with self._option():
self._token('children')
with self._option():
self._token('Members')
with self._option():
self._token('ALLMEMBERS')
self._error('expecting one of: ALLMEMBERS Members children members')
@graken()
def _dimension_(self):
self._pattern(r"[a-zA-Z0-9'_'' '',']*")
self.name_last_node('@')
@graken()
def _axis_name_(self):
with self._choice():
with self._option():
self._token('0')
with self._option():
self._token('1')
with self._option():
self._token('COLUMNS')
with self._option():
self._token('ROWS')
with self._option():
self._token('_ROWS')
self._error('expecting one of: 0 1 COLUMNS ROWS _ROWS')
@graken()
def _cube_specification_(self):
with self._optional():
self._left_bracket_()
self._dimension_()
with self._optional():
self._right_bracket_()
@graken()
def _condition_specification_(self):
with self._optional():
self._left_parentheses_()
def block0():
with self._optional():
self._point_()
with self._optional():
self._left_bracket_()
self._dimension_()
self.name_last_node('@')
with self._optional():
self._right_bracket_()
self._closure(block0)
with self._optional():
self._right_parentheses_()
@graken()
def _digit_(self):
with self._choice():
with self._option():
self._token('0')
with self._option():
self._token('1')
with self._option():
self._token('2')
with self._option():
self._token('3')
with self._option():
self._token('4')
with self._option():
self._token('5')
with self._option():
self._token('6')
with self._option():
self._token('7')
with self._option():
self._token('8')
with self._option():
self._token('9')
self._error('expecting one of: 0 1 2 3 4 5 6 7 8 9')
@graken()
def _fetch_type_(self):
with self._choice():
with self._option():
self._token('CROSSJOIN')
with self._option():
self._token('NONEMPTY')
with self._option():
self._token('union')
with self._option():
self._token('except')
with self._option():
self._token('extract')
self._error(
'expecting one of: CROSSJOIN NONEMPTY except extract union')
@graken()
def _dimension_shortcut_(self):
with self._choice():
with self._option():
self._token('all')
with self._option():
self._token('time')
self._error('expecting one of: all time')
@graken()
def _fetch_form_(self):
with self._choice():
with self._option():
self._token('NONEMPTY')
with self._option():
self._token('non_empty')
with self._option():
self._token('non empty')
self._error('expecting one of: NONEMPTY non empty non_empty')
@graken()
def _left_bracket_(self):
self._token('[')
@graken()
def _right_bracket_(self):
self._token(']')
@graken()
def _left_parentheses_(self):
self._token('(')
@graken()
def _right_parentheses_(self):
self._token(')')
@graken()
def _left_accolade_(self):
self._token('{')
@graken()
def _right_accolade_(self):
self._token('}')
@graken()
def _point_(self):
self._token('.')
@graken()
def _dpoint_(self):
self._token(':')
@graken()
def _comma_(self):
self._token(',')
@graken()
def _operator_(self):
with self._choice():
with self._option():
self._token('+')
with self._option():
self._token('-')
with self._option():
self._token('/')
with self._option():
self._token('*')
self._error('expecting one of: * + - /')
class UnknownSemantics(object):
def mdx_statement(self, ast):
return ast
def select_statement(self, ast):
return ast
def axis_specification(self, ast):
return ast
def dim_props_place(self, ast):
return ast
def dim_props_type(self, ast):
return ast
def dim_props_op_l1(self, ast):
return ast
def dim_props_op(self, ast):
return ast
def dim_props_ligne(self, ast):
return ast
def dimension_place(self, ast):
return ast
def dim_props(self, ast):
return ast
def laste_node(self, ast):
return ast
def dimension(self, ast):
return ast
def axis_name(self, ast):
return ast
def cube_specification(self, ast):
return ast
def condition_specification(self, ast):
return ast
def digit(self, ast):
return ast
def fetch_type(self, ast):
return ast
def dimension_shortcut(self, ast):
return ast
def fetch_form(self, ast):
return ast
def left_bracket(self, ast):
return ast
def right_bracket(self, ast):
return ast
def left_parentheses(self, ast):
return ast
def right_parentheses(self, ast):
return ast
def left_accolade(self, ast):
return ast
def right_accolade(self, ast):
return ast
def point(self, ast):
return ast
def dpoint(self, ast):
return ast
def comma(self, ast):
return ast
def operator(self, ast):
return ast
def main(filename, startrule, **kwargs):
with open(filename) as f:
text = f.read()
parser = MdxParserGen(parseinfo=False)
return parser.parse(text, startrule, filename=filename, **kwargs)
if __name__ == '__main__':
import json
ast = generic_main(main, MdxParserGen, name='Unknown')
print('AST:')
print(ast)
print()
print('JSON:')
print(json.dumps(ast, indent=2))
print()
from __future__ import absolute_import, division, print_function
class SelectStatement:
def __init__(self, select_statement):
self.select_statement = select_statement
def __str__(self):
return '{}'.format(self.select_statement)
(* The ebnf file is the translation and improvement of microsoft mdx's spec, from https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms717923%28v=vs.85%29.aspx *)
(* The ebnf conversion was guided by http://stackoverflow.com/questions/14922242/how-to-convert-bnf-to-ebnf *)
(* The generation of mdx_parser.py is as easy as: /path/to/grako bnf_mdx.ebnf -o mdx_parser.py *)
mdx_statement =
select_statement;
select_statement::SelectStatement = name:'SELECT' [axis_specification_columns:axis_specification]
[',' axis_specification_rows:axis_specification]
'FROM' cube_specification:cube_specification
['WHERE' condition_specification:condition_specification]$
;
axis_specification = [left_accolade] [fetch_form] @:dim_props_place [right_accolade] 'ON' axis_name;
dim_props_place = [left_parentheses] @:dim_props_type [right_parentheses] ;
dim_props_type = [left_accolade] [fetch_type] @:dim_props_op_l1 [right_accolade] [{@:operator dim_props_type}*];
dim_props_op_l1 = [left_parentheses] @:dim_props_op [right_parentheses];
dim_props_op = [left_accolade] @:dim_props_ligne [right_accolade] [{@:comma dim_props_op}*];
dim_props_ligne = [left_parentheses] @:dimension_place [right_parentheses] [{ (@:comma | @:dpoint ) @:dim_props_ligne}*] ;
dimension_place = [left_accolade] @:dim_props [ point @:laste_node] [ @:comma @:dim_props [ point @:laste_node ]] [@:dpoint @:dim_props [ point @:laste_node ]] [right_accolade] | @:dimension_shortcut;
dim_props = {[point] [left_bracket] @:dimension [right_bracket]}* ;
laste_node = "members" | "children" | "Members" | 'ALLMEMBERS' ;
dimension = @:/[a-zA-Z0-9'_'' '',']*/ ;
axis_name = '0' | '1' |'COLUMNS' | 'ROWS' | '_ROWS';
cube_specification = [left_bracket] dimension [right_bracket];
condition_specification = [left_parentheses] {[point] [left_bracket] @:dimension [right_bracket]}* [right_parentheses] ;
digit =
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
fetch_type = 'CROSSJOIN' | 'NONEMPTY' | 'union' | 'except' | 'extract' ;
dimension_shortcut = 'all' | 'time' ;
fetch_form = 'NONEMPTY' | 'non_empty' | 'non empty' ;
left_bracket = '[';
right_bracket = ']';
left_parentheses = '(';
right_parentheses = ')';
left_accolade = '{';
right_accolade = '}';
point = '.' ;
dpoint = ':' ;
comma = ',' ;
operator = '+' | '-' | '/' | '*' ;
from __future__ import absolute_import, division, print_function
from grako.model import ModelBuilderSemantics
from .gen_parser.mdxparser import MdxParserGen
from .gen_parser.models import SelectStatement
class MdxParser:
"""Parse the mdx query and split it into well-defined parts."""
START = 'MDX_statement'
@staticmethod
def parsing_mdx_query(axis, query):
"""Split the query into axis.
**Example**::
SELECT
{ [Geography].[Geo].[Country].[France],
[Geography].[Geo].[Country].[Spain] } ON COLUMNS,
{ [Product].[Prod].[Company].[Crazy Development] } ON ROWS
FROM [Sales]
WHERE [Time].[Calendar].[Year].[2010]
+------------+------------------------------------------------+
| | [Geography].[Geo].[Country].[France] |
| column | |
| | [Geography].[Geo].[Country].[Spain] |
+------------+------------------------------------------------+
| row | [Product].[Prod].[Company].[Crazy Development] |
+------------+------------------------------------------------+
| cube | [Sales] |
+------------+------------------------------------------------+
| condition | [Time].[Calendar].[Year].[2010] |
+------------+------------------------------------------------+
:param query: MDX Query
:param axis: column | row | cube | condition | all
:return: Tuples in the axis, from the MDX query
"""
model = MdxParserGen(semantics=ModelBuilderSemantics(
types=[SelectStatement]))
ast = model.parse(query, rule_name=MdxParser.START, ignorecase=True)
if axis == "column":
if ast.select_statement.axis_specification_columns is not None and \
u'' in ast.select_statement.axis_specification_columns:
ast.select_statement.axis_specification_columns.remove(u'')
return ast.select_statement.axis_specification_columns
elif axis == "row":
if ast.select_statement.axis_specification_rows is not None and \
u'' in ast.select_statement.axis_specification_rows:
ast.select_statement.axis_specification_rows.remove(u'')
return ast.select_statement.axis_specification_rows
elif axis == "cube":
if ast.select_statement.cube_specification is not None and \
u'' in ast.select_statement.cube_specification:
ast.select_statement.cube_specification.remove(u'')
return ast.select_statement.cube_specification[1] if \
isinstance(ast.select_statement.cube_specification, list) \
else ast.select_statement.cube_specification
elif axis == "condition":
if ast.select_statement.condition_specification is not None and \
type(ast.select_statement.condition_specification) not in (
unicode, str) and \
u'' in ast.select_statement.condition_specification:
ast.select_statement.condition_specification.remove(u'')
return ast.select_statement.condition_specification
elif axis == "all":
return 'Operation = {} \n' \
'Columns = {} \n' \
'Rows = {} \n' \
'From = {} \n' \
'Where = {} \n'.format(ast.select_statement.name,
ast.select_statement.from_,
ast.select_statement.axis_specification_columns,
ast.select_statement.axis_specification_rows,
ast.select_statement.cube_specification,
ast.select_statement.condition_specification,
)
Memory summary:1 - before executing query //// _load_tables_db
VM: 700.28Mb
Memory summary:2 - after query, before fetchall /////// _load_tables_db
VM: 2781.79Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 2782.04Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 2782.29Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 2782.29Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 2784.29Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 2784.29Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 2784.29Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 2784.04Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 2783.79Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 2783.79Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 2784.29Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 2784.29Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 2784.29Mb
Memory summary:1 - before executing query //// _load_tables_db
types | # objects | total size
======= | =========== | ============
dict | 7429 | 8.72 MB
str | 58990 | 8.30 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// _load_tables_db
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 180 | 2.01 GB
dict | 7802 | 8.88 MB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 592 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 179 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 618 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 180 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 618 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 180 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 185 | 2.01 GB
<class 'pandas.core.series.Series | 618 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 630 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 185 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 185 | 2.01 GB
<class 'pandas.core.series.Series | 604 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 180 | 2.01 GB
<class 'pandas.core.series.Series | 592 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 181 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 181 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 2.01 GB
<class 'pandas.core.series.Series | 629 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 641 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 186 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 2.01 GB
<class 'pandas.core.series.Series | 615 | 2.01 GB
Memory summary:1 - before executing query //// _load_tables_db
VM: 700.27Mb
Memory summary:2 - after query, before fetchall /////// _load_tables_db
VM: 2719.36Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 2719.36Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 2719.86Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 2719.86Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 2721.61Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 2721.61Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 2721.61Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 2721.61Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 2721.36Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 2721.36Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 2721.86Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 2721.86Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 2721.86Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 2721.86Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 2721.36Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 2721.36Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 2721.86Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 2721.86Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 2721.86Mb
****************************************
types | # objects | total size
======= | =========== | ============
dict | 7430 | 8.72 MB
str | 58996 | 8.30 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// _load_tables_db
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 180 | 1.93 GB
dict | 7803 | 8.86 MB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 592 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 179 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 618 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 180 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 618 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 180 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 185 | 1.93 GB
<class 'pandas.core.series.Series | 618 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 630 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 185 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 185 | 1.93 GB
<class 'pandas.core.series.Series | 604 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 180 | 1.93 GB
<class 'pandas.core.series.Series | 592 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 181 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 181 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
<class 'pandas.core.series.Series | 629 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 641 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
<class 'pandas.core.series.Series | 615 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 603 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 180 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 181 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 181 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
<class 'pandas.core.series.Series | 629 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 641 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
<class 'pandas.core.series.Series | 615 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// _load_tables_db
VM: 700.27Mb
Memory summary:2 - after query, before fetchall /////// _load_tables_db
VM: 3229.79Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 3229.79Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 3230.29Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 3230.29Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 3232.29Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 3232.29Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 3232.29Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 3232.29Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 3232.29Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 3232.29Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 3232.29Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 3232.29Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 3232.29Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 3232.29Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 3232.29Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 3232.29Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 3232.54Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 3232.54Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 3232.54Mb
*****************************************************************
Memory summary:1 - before executing query //// _load_tables_db
types | # objects | total size
======= | =========== | ============
dict | 7429 | 8.72 MB
str | 58990 | 8.30 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// _load_tables_db
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 180 | 2.01 GB
dict | 7844 | 8.91 MB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 592 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 179 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 618 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 180 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 618 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 180 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 185 | 2.01 GB
<class 'pandas.core.series.Series | 618 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 630 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 185 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 185 | 2.01 GB
<class 'pandas.core.series.Series | 604 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 180 | 2.01 GB
<class 'pandas.core.series.Series | 592 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 181 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 181 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 2.01 GB
<class 'pandas.core.series.Series | 629 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 641 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 186 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 2.01 GB
<class 'pandas.core.series.Series | 615 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 603 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 180 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 181 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 181 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 2.01 GB
<class 'pandas.core.series.Series | 629 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 641 | 2.01 GB
<class 'pandas.core.frame.DataFrame | 186 | 2.01 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 2.01 GB
<class 'pandas.core.series.Series | 615 | 2.01 GB
----------------------------
Memory summary:1 - before executing query //// _load_tables_db
VM: 699.77Mb
Memory summary:2 - after query, before fetchall /////// _load_tables_db
VM: 1763.52Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 1763.52Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 1763.77Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 1763.77Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 1764.27Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 1764.27Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 1764.27Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 1764.27Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 1764.27Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 1764.27Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 1764.27Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 1764.27Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 1764.27Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 1764.27Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 1764.27Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 1764.27Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 1764.52Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 1764.52Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 1764.52Mb
**********************************
types | # objects | total size
======= | =========== | ============
dict | 7235 | 8.59 MB
str | 58455 | 8.18 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// _load_tables_db
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 180 | 539.66 MB
dict | 7649 | 8.78 MB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 592 | 538.26 MB
<class 'pandas.core.frame.DataFrame | 179 | 538.22 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 618 | 539.70 MB
<class 'pandas.core.frame.DataFrame | 180 | 539.66 MB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 618 | 539.70 MB
<class 'pandas.core.frame.DataFrame | 180 | 539.66 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 185 | 540.42 MB
<class 'pandas.core.series.Series | 618 | 539.70 MB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 630 | 540.47 MB
<class 'pandas.core.frame.DataFrame | 185 | 540.42 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 185 | 540.83 MB
<class 'pandas.core.series.Series | 604 | 539.02 MB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 180 | 538.50 MB
<class 'pandas.core.series.Series | 592 | 538.26 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 540.06 MB
<class 'pandas.core.frame.DataFrame | 181 | 539.94 MB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 540.06 MB
<class 'pandas.core.frame.DataFrame | 181 | 539.94 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 540.71 MB
<class 'pandas.core.series.Series | 629 | 540.06 MB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 641 | 540.83 MB
<class 'pandas.core.frame.DataFrame | 186 | 540.71 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 541.11 MB
<class 'pandas.core.series.Series | 615 | 539.39 MB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 603 | 538.62 MB
<class 'pandas.core.frame.DataFrame | 180 | 538.50 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 540.06 MB
<class 'pandas.core.frame.DataFrame | 181 | 539.94 MB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 540.06 MB
<class 'pandas.core.frame.DataFrame | 181 | 539.94 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 540.71 MB
<class 'pandas.core.series.Series | 629 | 540.06 MB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 641 | 540.83 MB
<class 'pandas.core.frame.DataFrame | 186 | 540.71 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 541.11 MB
<class 'pandas.core.series.Series | 615 | 539.39 MB
...@@ -198,12 +198,18 @@ class ConfigParser: ...@@ -198,12 +198,18 @@ class ConfigParser:
:param cube_path: path to cube (csv folders) :param cube_path: path to cube (csv folders)
:param file_name: config file name (DEFAULT = cubes-config.xml) :param file_name: config file name (DEFAULT = cubes-config.xml)
""" """
if cube_path is None: # home_directory = home_directory
if 'OLAPY_PATH' in os.environ:
home_directory = os.environ['OLAPY_PATH']
else:
from os.path import expanduser from os.path import expanduser
home_directory = expanduser("~") home_directory = expanduser("~")
if cube_path is None:
self.cube_path = os.path.join(home_directory, 'olapy-data', 'cubes') self.cube_path = os.path.join(home_directory, 'olapy-data', 'cubes')
else: else:
self.cube_path = cube_path self.cube_path = cube_path
self.file_name = file_name self.file_name = file_name
self.web_config_file_name = web_config_file_name self.web_config_file_name = web_config_file_name
...@@ -284,15 +290,21 @@ class ConfigParser: ...@@ -284,15 +290,21 @@ class ConfigParser:
]) for xml_facts in tree.xpath('/cubes/cube/facts') ]) for xml_facts in tree.xpath('/cubes/cube/facts')
] ]
# keys = {
# key.text: key.attrib['ref']
# for key in xml_facts.findall('keys/column_name')
# },
dimensions = [ dimensions = [
Dimension( Dimension(
name=xml_dimension.find('name').text, name=xml_dimension.find('name').text,
# column_new_name = [key.attrib['column_new_name'] for key in xml_dimension.findall('name')],
displayName=xml_dimension.find('displayName').text, displayName=xml_dimension.find('displayName').text,
columns=[ columns={
column_name.text column_name.text : None if not column_name.attrib else column_name.attrib['column_new_name']
for column_name in xml_dimension.findall( for column_name in xml_dimension.findall(
'columns/name') 'columns/name')
]) })
for xml_dimension in tree.xpath( for xml_dimension in tree.xpath(
'/cubes/cube/dimensions/dimension') '/cubes/cube/dimensions/dimension')
] ]
......
import psycopg2 as pg # import psycopg2 as pg
from sqlalchemy import create_engine
# postgres connection # postgres connection
from olapy_config_file_parser import DbConfigParser from olapy_config_file_parser import DbConfigParser
...@@ -16,28 +17,32 @@ class MyDB(object): ...@@ -16,28 +17,32 @@ class MyDB(object):
# raise Exception('Missing database config file') # raise Exception('Missing database config file')
def __init__(self,db=None): def __init__(self,db_config_file_path=None,db=None):
# TODO temporary # TODO temporary
db_config = DbConfigParser() db_config = DbConfigParser(config_path=db_config_file_path)
db_credentials = db_config.get_db_credentials()[0] db_credentials = db_config.get_db_credentials()[0]
username = db_credentials['user_name'] username = db_credentials['user_name']
password = db_credentials['password'] password = db_credentials['password']
host = db_credentials['host'] host = db_credentials['host']
port = db_credentials['port']
if db is None: if db is None:
# first i want to show all databases to user (in excel) # first i want to show all databases to user (in excel)
self.connection = pg.connect("user={0} password={1} host='{2}'". # self.engine = pg.connect("user={0} password={1} host='{2}'".
format(username, password, host)) # format(username, password, host))
self.engine = create_engine('postgresql+psycopg2://{0}:{1}@{3}:{4}/{2}'.format(
username, password, 'postgres', host, port))
else: else:
# and then we connect to the user db # and then we connect to the user db
try: self.engine = create_engine('postgresql+psycopg2://{0}:{1}@{3}:{4}/{2}'.format(
self.connection = pg.connect( username, password, db, host, port))
"user={0} password={1} dbname='{2}' host='{3}'".format( # self.connection = pg.connect(
username, password, db, host)) # "user={0} password={1} dbname='{2}' host='{3}'".format(
except: # username, password, db, host))
print("can't connect")
def __del__(self): def __del__(self):
if hasattr(self, 'connection'): if hasattr(self, 'connection'):
self.connection.close() self.engine.dispose()
import os
from os.path import expanduser
from pympler import summary, muppy
import psutil
def get_virtual_memory_usage_kb():
"""
The process's current virtual memory size in Kb, as a float.
"""
return float(psutil.Process().memory_info_ex().vms) / 1024.0
def memory_usage(where):
"""
Print out a basic summary of memory usage.
"""
with open(os.path.join(expanduser('~'), 'bech_mem.txt'), mode='a+') as file:
mem_summary = summary.summarize(muppy.get_objects())
file.write("Memory summary:" + where + '\n\n')
print("Memory summary:" + where )
summary.print_(mem_summary, limit=2)
print('----------------------------')
file.write("VM: %.2fMb" % (get_virtual_memory_usage_kb() / 1024.0) + '\n\n')
...@@ -43,15 +43,13 @@ class DbConfigParser: ...@@ -43,15 +43,13 @@ class DbConfigParser:
parser = etree.XMLParser() parser = etree.XMLParser()
tree = etree.parse(config_file, parser) tree = etree.parse(config_file, parser)
try: return [
return [ {
{ # 'sgbd': db.find('sgbd').text,
# 'sgbd': db.find('sgbd').text, 'user_name': db.find('user_name').text,
'user_name': db.find('user_name').text, 'password': db.find('password').text,
'password': db.find('password').text, 'host': db.find('host').text,
'host': db.find('host').text, 'port': db.find('port').text,
} }
for db in tree.xpath('/olapy/database') for db in tree.xpath('/olapy/database')
] ]
except:
raise ('missed name or source tags')
...@@ -222,7 +222,7 @@ application = Application( ...@@ -222,7 +222,7 @@ application = Application(
wsgi_application = WsgiApplication(application) wsgi_application = WsgiApplication(application)
def start_server(write_on_file=False): def start_server(host='0.0.0.0',port=5000,write_on_file=False):
""" """
Start the xmla server. Start the xmla server.
...@@ -254,7 +254,7 @@ def start_server(write_on_file=False): ...@@ -254,7 +254,7 @@ def start_server(write_on_file=False):
logging.getLogger('spyne.protocol.xml').setLevel(logging.DEBUG) logging.getLogger('spyne.protocol.xml').setLevel(logging.DEBUG)
logging.info("listening to http://127.0.0.1:8000/xmla") logging.info("listening to http://127.0.0.1:8000/xmla")
logging.info("wsdl is at: http://localhost:8000/xmla?wsdl") logging.info("wsdl is at: http://localhost:8000/xmla?wsdl")
server = make_server('0.0.0.0', 8000, wsgi_application) server = make_server(host, port, wsgi_application)
server.serve_forever() server.serve_forever()
......
...@@ -1830,8 +1830,10 @@ class XmlaDiscoverTools(): ...@@ -1830,8 +1830,10 @@ class XmlaDiscoverTools():
# TODO in another idea, change this # TODO in another idea, change this
# TO CHANGE NAME DISPLAY THAT EXISTS IN CONFIG FILE # TO CHANGE NAME DISPLAY THAT EXISTS IN CONFIG FILE
if MdxEngine.dimension_display_name != [] and tables in MdxEngine.dimension_display_name:
continue # if MdxEngine.dimension_display_name != [] and tables in MdxEngine.dimension_display_name:
# continue
rows += """ rows += """
<row> <row>
<CATALOG_NAME>{0}</CATALOG_NAME> <CATALOG_NAME>{0}</CATALOG_NAME>
...@@ -1888,6 +1890,15 @@ class XmlaDiscoverTools(): ...@@ -1888,6 +1890,15 @@ class XmlaDiscoverTools():
for table_name, df in self.executer.tables_loaded.items(): for table_name, df in self.executer.tables_loaded.items():
if table_name == self.executer.facts: if table_name == self.executer.facts:
continue continue
# french caracteres
# TODO encode dataframe
if type(df.iloc[0][0]) == unicode:
column_attribut = df.iloc[0][0].encode('utf-8')
else:
column_attribut = df.iloc[0][0]
rows += """ rows += """
<row> <row>
<CATALOG_NAME>{0}</CATALOG_NAME> <CATALOG_NAME>{0}</CATALOG_NAME>
...@@ -1910,8 +1921,11 @@ class XmlaDiscoverTools(): ...@@ -1910,8 +1921,11 @@ class XmlaDiscoverTools():
<HIERARCHY_ORIGIN>1</HIERARCHY_ORIGIN> <HIERARCHY_ORIGIN>1</HIERARCHY_ORIGIN>
<INSTANCE_SELECTION>0</INSTANCE_SELECTION> <INSTANCE_SELECTION>0</INSTANCE_SELECTION>
</row> </row>
""".format(self.selected_catalogue, table_name, """.format(self.selected_catalogue,
df.columns[0], df.iloc[0][0]) table_name,
df.columns[0],
column_attribut)
rows += """ rows += """
<row> <row>
...@@ -1953,6 +1967,14 @@ class XmlaDiscoverTools(): ...@@ -1953,6 +1967,14 @@ class XmlaDiscoverTools():
for table_name, df in self.executer.tables_loaded.items(): for table_name, df in self.executer.tables_loaded.items():
if table_name == self.executer.facts: if table_name == self.executer.facts:
continue continue
# french caracteres
# TODO encode dataframe
if type(df.iloc[0][0]) == unicode:
column_attribut = df.iloc[0][0].encode('utf-8')
else:
column_attribut = df.iloc[0][0]
rows += """ rows += """
<row> <row>
<CATALOG_NAME>{0}</CATALOG_NAME> <CATALOG_NAME>{0}</CATALOG_NAME>
...@@ -1975,8 +1997,10 @@ class XmlaDiscoverTools(): ...@@ -1975,8 +1997,10 @@ class XmlaDiscoverTools():
<HIERARCHY_ORIGIN>1</HIERARCHY_ORIGIN> <HIERARCHY_ORIGIN>1</HIERARCHY_ORIGIN>
<INSTANCE_SELECTION>0</INSTANCE_SELECTION> <INSTANCE_SELECTION>0</INSTANCE_SELECTION>
</row> </row>
""".format(self.selected_catalogue, table_name, """.format(self.selected_catalogue,
df.columns[0], df.iloc[0][0]) table_name,
df.columns[0],
column_attribut)
rows += """ rows += """
<row> <row>
......
...@@ -3,6 +3,8 @@ from __future__ import absolute_import, division, print_function ...@@ -3,6 +3,8 @@ from __future__ import absolute_import, division, print_function
import itertools import itertools
from collections import OrderedDict from collections import OrderedDict
import numpy as np
class XmlaExecuteTools(): class XmlaExecuteTools():
"""XmlaExecuteTools for generating xmla execute responses.""" """XmlaExecuteTools for generating xmla execute responses."""
...@@ -149,6 +151,12 @@ class XmlaExecuteTools(): ...@@ -149,6 +151,12 @@ class XmlaExecuteTools():
for tupl in tupls: for tupl in tupls:
tuple_without_minus_1 = self.get_tuple_without_nan(tupl) tuple_without_minus_1 = self.get_tuple_without_nan(tupl)
# french caracteres
# TODO encode dataframe
if type(tuple_without_minus_1[-1]) == unicode:
tuple_without_minus_1 = [x.encode('utf-8') for x in tuple_without_minus_1]
axis0 += """ axis0 += """
<Member Hierarchy="[{0}].[{0}]"> <Member Hierarchy="[{0}].[{0}]">
<UName>[{0}].[{0}].[{1}].{2}</UName> <UName>[{0}].[{0}].[{1}].{2}</UName>
...@@ -324,7 +332,7 @@ class XmlaExecuteTools(): ...@@ -324,7 +332,7 @@ class XmlaExecuteTools():
cell_data = "" cell_data = ""
index = 0 index = 0
for value in columns_loop: for value in columns_loop:
if value == -1: if np.isnan(value) :
value = '' value = ''
cell_data += """ cell_data += """
<Cell CellOrdinal="{0}"> <Cell CellOrdinal="{0}">
...@@ -550,6 +558,14 @@ class XmlaExecuteTools(): ...@@ -550,6 +558,14 @@ class XmlaExecuteTools():
set(table_name set(table_name
for table_name in mdx_execution_result['columns_desc'] for table_name in mdx_execution_result['columns_desc']
['all'])): ['all'])):
# TODO encode dataframe
# french caracteres
if type(self.executer.tables_loaded[dim_diff].iloc[0][0]) == unicode:
column_attribut = self.executer.tables_loaded[dim_diff].iloc[0][0].encode('utf-8')
else:
column_attribut = self.executer.tables_loaded[dim_diff].iloc[0][0]
tuple += """ tuple += """
<Member Hierarchy="[{0}].[{0}]"> <Member Hierarchy="[{0}].[{0}]">
<UName>[{0}].[{0}].[{1}].[{2}]</UName> <UName>[{0}].[{0}].[{1}].[{2}]</UName>
...@@ -560,7 +576,7 @@ class XmlaExecuteTools(): ...@@ -560,7 +576,7 @@ class XmlaExecuteTools():
</Member> </Member>
""".format(dim_diff, """.format(dim_diff,
self.executer.tables_loaded[dim_diff].columns[0], self.executer.tables_loaded[dim_diff].columns[0],
self.executer.tables_loaded[dim_diff].iloc[0][0]) column_attribut)
# if we have zero on one only measures used # if we have zero on one only measures used
if len(self.executer.selected_measures) <= 1: if len(self.executer.selected_measures) <= 1:
......
# #grako
grako
pandas<1 pandas<1
lxml==3.6.0 #lxml 3.7 causes problems in windows lxml==3.6.0 #lxml 3.7 causes problems in windows
spyne<3 spyne<3
treelib<2 treelib<2
SQLAlchemy
psycopg2 psycopg2
# tools
pympler
psutil
# Test # Test
werkzeug werkzeug
......
...@@ -4,29 +4,14 @@ ...@@ -4,29 +4,14 @@
# #
# pip-compile --output-file requirements.txt requirements.in # pip-compile --output-file requirements.txt requirements.in
# #
click==6.7 # via flask grako==3.99.9
Flask-Login==0.3.2
Flask-Script==2.0.5
Flask-Session==0.3.0
Flask-SQLAlchemy==2.1
Flask-WTF==0.12
flask==0.12.1
grako==3.22.0
itsdangerous==0.24 # via flask
jinja2==2.9.6 # via flask
lxml==3.6.0 lxml==3.6.0
markupsafe==1.0 # via jinja2
numpy==1.12.1 # via pandas numpy==1.12.1 # via pandas
pandas==0.19.2 pandas==0.20.1
plotly==1.12.9
psycopg2==2.7.1 psycopg2==2.7.1
python-dateutil==2.6.0 # via pandas python-dateutil==2.6.0 # via pandas
pytz==2017.2 # via pandas, plotly, spyne pytz==2017.2 # via pandas, spyne
requests==2.13.0 # via plotly six==1.10.0 # via python-dateutil
six==1.10.0 # via plotly, python-dateutil
spyne==2.12.14 spyne==2.12.14
sqlalchemy==1.0.17
treelib==1.3.5 treelib==1.3.5
werkzeug==0.12.1 # via flask, flask-wtf werkzeug==0.12.1
wtforms==2.1
XlsxWriter==0.9.3
...@@ -29,7 +29,7 @@ setup( ...@@ -29,7 +29,7 @@ setup(
install_requires=install_requires, install_requires=install_requires,
include_package_data=False, include_package_data=False,
# cmdclass={ # cmdclass={
# # 'develop': PostDevelopCommand, # 'develop': PostDevelopCommand,
# 'install': PostInstallCommand, # 'install': PostInstallCommand,
# }, # },
classifiers=[ classifiers=[
...@@ -41,8 +41,9 @@ setup( ...@@ -41,8 +41,9 @@ setup(
# "Topic :: Business intelligence", # "Topic :: Business intelligence",
],) ],)
if 'OLAPY_PATH' in os.environ:
if RUNNING_TOX: home_directory = os.environ['OLAPY_PATH']
elif RUNNING_TOX:
home_directory = os.environ.get('HOME_DIR') home_directory = os.environ.get('HOME_DIR')
else: else:
home_directory = expanduser("~") home_directory = expanduser("~")
...@@ -55,5 +56,3 @@ if not os.path.isdir(os.path.join(home_directory, 'olapy-data', 'cubes')): ...@@ -55,5 +56,3 @@ if not os.path.isdir(os.path.join(home_directory, 'olapy-data', 'cubes')):
if not os.path.isfile(os.path.join(home_directory, 'olapy-data','olapy-config.xml')): if not os.path.isfile(os.path.join(home_directory, 'olapy-data','olapy-config.xml')):
copyfile('config/olapy-config.xml', os.path.join(home_directory, 'olapy-data','olapy-config.xml')) copyfile('config/olapy-config.xml', os.path.join(home_directory, 'olapy-data','olapy-config.xml'))
from __future__ import absolute_import, division, print_function
import pandas as pd
from pandas.util.testing import assert_frame_equal
from olapy.core.mdx.executor.execute import MdxEngine
from olapy.core.mdx.parser.parse import MdxParser
CUBE = 'sales'
query1 = "SELECT" \
"{[Measures].[Amount]} ON COLUMNS " \
"FROM [sales]"
query2 = """SELECT
{[Geography].[Economy].[Partnership]} ON COLUMNS
FROM [sales]"""
query3 = """SELECT
{[Measures].[Amount]} on 0,
non empty {[Geography].[Geo].[Country].members} ON COLUMNS
FROM [sales]"""
query4 = """SELECT
{[Geography].[Economy].[Partnership]} ON COLUMNS,
non empty {[Geography].[Geo].[Country].members} on 1
from [sales]"""
query5 = """select
{[Geography].[Economy].[Country]} on 0,
non empty {[Geography].[Geo].[Country].members} on 1
from [sales]"""
query6 = """select
{[Geography].[Economy].[Partnership]} on 0,
{[Product].[Prod].[Company]} on 1
from [sales]"""
query7 = """select
{[Geography].[Economy].[Partnership].[EU]} on 0,
{[Product].[Prod].[Company].[Crazy Development]} on 1
from [sales]"""
query8 = """select
{[Geography].[Economy].[Partnership].[EU],
[Geography].[Economy].[Partnership].[None],
[Geography].[Economy].[Partnership].[NAFTA]} on 0,
{[Product].[Prod].[Company].[Crazy Development],
[Product].[Prod].[Company].[Company_test],
[Product].[Prod].[Company].[test_Development]} on 1
from [sales]"""
query9 = """select
{[Geography].[Economy].[Partnership].[EU],
[Geography].[Economy].[Partnership].[None]} on 0
from [sales]"""
query10 = """select
{[Geography].[Geo].[Country].[France],
[Geography].[Geo].[Country].[Spain]} on 0,
non empty {[Measures].[Amount]} on 1
from [sales]"""
where1 = "Where [Time].[Calendar].[Day].[May 12,2010]"
where2 = "Where[Product].[olapy].[Personal]"
where3 = "Where[Time].[Calendar].[Year].[2010]"
where4 = "Where [Measures].[Count]"
where5 = "where [Count]"
query11 = """
SELECT NON EMPTY Hierarchize(AddCalculatedMembers(DrilldownMember({{DrilldownMember({{DrilldownMember({{
[Time].[Time].[Year].Members}}, {
[Time].[Time].[Year].[2010]})}}, {
[Time].[Time].[Quarter].[2010].[Q2 2010]})}}, {
[Time].[Time].[Month].[2010].[Q2 2010].[May 2010]}))) DIMENSION PROPERTIES PARENT_UNIQUE_NAME,HIERARCHY_UNIQUE_NAME
ON COLUMNS
FROM [sales] WHERE ([Measures].[Amount])
CELL PROPERTIES VALUE, FORMAT_STRING, LANGUAGE, BACK_COLOR, FORE_COLOR, FONT_FLAGS
"""
query12 = """SELECT NON EMPTY Hierarchize(AddCalculatedMembers({
[Geography].[Geography].[Continent].Members}))
DIMENSION PROPERTIES PARENT_UNIQUE_NAME,HIERARCHY_UNIQUE_NAME ON COLUMNS
FROM [sales]
WHERE ([Measures].[Amount])
CELL PROPERTIES VALUE, FORMAT_STRING, LANGUAGE, BACK_COLOR, FORE_COLOR, FONT_FLAGS"""
parser = MdxParser()
executer = MdxEngine(CUBE)
def test_parsing_query1():
assert parser.parsing_mdx_query(
'column', query=query1) == ['Measures', 'Amount']
assert parser.parsing_mdx_query('cube', query=query1) == "sales"
assert parser.parsing_mdx_query('row', query=query1) is None
query1_where = query1 + '\n' + where1
assert parser.parsing_mdx_query(
'condition',
query=query1_where) == [u'Time', u'Calendar', u'Day', u'May 12,2010']
query2_where = query1 + '\n' + where2
assert parser.parsing_mdx_query(
'condition', query=query2_where) == [u'Product', u'olapy', u'Personal']
query3_where = query1 + '\n' + where3
assert parser.parsing_mdx_query(
'condition',
query=query3_where) == [u'Time', u'Calendar', u'Year', u'2010']
def test_parsing_query2():
assert parser.parsing_mdx_query(
'column', query=query2) == [u'Geography', u'Economy', u'Partnership']
assert parser.parsing_mdx_query('cube', query=query2) == "sales"
assert parser.parsing_mdx_query('row', query=query2) is None
query1_where = query2 + '\n' + where1
assert parser.parsing_mdx_query(
'condition',
query=query1_where) == [u'Time', u'Calendar', u'Day', u'May 12,2010']
query2_where = query2 + '\n' + where2
assert parser.parsing_mdx_query(
'condition', query=query2_where) == [u'Product', u'olapy', u'Personal']
query3_where = query2 + '\n' + where3
assert parser.parsing_mdx_query(
'condition',
query=query3_where) == [u'Time', u'Calendar', u'Year', u'2010']
def test_parsing_query3():
assert parser.parsing_mdx_query(
'column', query=query3) == [u'Measures', u'Amount']
assert parser.parsing_mdx_query('cube', query=query3) == "sales"
assert parser.parsing_mdx_query(
'row', query=query3) == [u'Geography', u'Geo', u'Country', u'members']
query1_where = query3 + '\n' + where1
assert parser.parsing_mdx_query(
'condition',
query=query1_where) == [u'Time', u'Calendar', u'Day', u'May 12,2010']
query2_where = query3 + '\n' + where2
assert parser.parsing_mdx_query(
'condition', query=query2_where) == [u'Product', u'olapy', u'Personal']
query3_where = query3 + '\n' + where3
assert parser.parsing_mdx_query(
'condition',
query=query3_where) == [u'Time', u'Calendar', u'Year', u'2010']
def test_parsing_query4():
query0 = query4
assert parser.parsing_mdx_query(
'column', query=query0) == [u'Geography', u'Economy', u'Partnership']
assert parser.parsing_mdx_query('cube', query=query0) == "sales"
assert parser.parsing_mdx_query(
'row', query=query0) == [u'Geography', u'Geo', u'Country', u'members']
query1_where = query0 + '\n' + where1
assert parser.parsing_mdx_query(
'condition',
query=query1_where) == [u'Time', u'Calendar', u'Day', u'May 12,2010']
query2_where = query0 + '\n' + where2
assert parser.parsing_mdx_query(
'condition', query=query2_where) == [u'Product', u'olapy', u'Personal']
query3_where = query0 + '\n' + where3
assert parser.parsing_mdx_query(
'condition',
query=query3_where) == [u'Time', u'Calendar', u'Year', u'2010']
def test_parsing_query5():
query0 = query5
assert parser.parsing_mdx_query(
'column', query=query0) == [u'Geography', u'Economy', u'Country']
assert parser.parsing_mdx_query('cube', query=query0) == "sales"
assert parser.parsing_mdx_query(
'row', query=query0) == [u'Geography', u'Geo', u'Country', u'members']
query1_where = query0 + '\n' + where1
assert parser.parsing_mdx_query(
'condition',
query=query1_where) == [u'Time', u'Calendar', u'Day', u'May 12,2010']
query2_where = query0 + '\n' + where2
assert parser.parsing_mdx_query(
'condition', query=query2_where) == [u'Product', u'olapy', u'Personal']
query3_where = query0 + '\n' + where3
assert parser.parsing_mdx_query(
'condition',
query=query3_where) == [u'Time', u'Calendar', u'Year', u'2010']
def test_parsing_query6():
query0 = query6
assert parser.parsing_mdx_query(
'column', query=query0) == [u'Geography', u'Economy', u'Partnership']
assert parser.parsing_mdx_query('cube', query=query0) == "sales"
assert parser.parsing_mdx_query(
'row', query=query0) == [u'Product', u'Prod', u'Company']
query1_where = query0 + '\n' + where1
assert parser.parsing_mdx_query(
'condition',
query=query1_where) == [u'Time', u'Calendar', u'Day', u'May 12,2010']
query2_where = query0 + '\n' + where2
assert parser.parsing_mdx_query(
'condition', query=query2_where) == [u'Product', u'olapy', u'Personal']
query3_where = query0 + '\n' + where3
assert parser.parsing_mdx_query(
'condition',
query=query3_where) == [u'Time', u'Calendar', u'Year', u'2010']
def test_execution_query1():
executer.mdx_query = query1
assert executer.execute_mdx()['result']['Amount'][0] == 1023
executer.mdx_query = query11
assert executer.execute_mdx()['result']['Amount'][3] == 1
assert executer.execute_mdx()['result']['Amount'][4] == 2
def test_execution_query3():
df = pd.DataFrame({
'Continent': ['America', 'Europe'],
'Amount': [768, 255]
}).groupby(['Continent']).sum()
executer.mdx_query = query12
assert assert_frame_equal(df, executer.execute_mdx()['result']) is None
executer.mdx_query = query11
assert list(executer.execute_mdx()['result'][
'Amount']) == [1023, 1023, 1023, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment