Commit 65220674 authored by mouadh's avatar mouadh

mem_bech

parent be5fedf0
from __future__ import absolute_import, division, print_function
from ..tools.mem_bench import memory_usage
from ..tools.connection import MyDB
import pandas.io.sql as psql
import os
......@@ -18,14 +19,17 @@ def _load_table_config_file(executer_instance, cube_obj):
db = MyDB(db=executer_instance.cube)
memory_usage("1 - before executing query //// _load_table_config_file")
for table in cube_obj.dimensions:
value = psql.read_sql_query("SELECT * FROM {0}".format(table.name),
db.engine)
with db.engine as connection:
value = psql.read_sql_query("SELECT * FROM {0}".format(table.name),
connection)
tables[table.name] = value[[
col for col in value.columns if col.lower()[-3:] != '_id'
]]
memory_usage("2 - after query, before fetchall /////// _load_table_config_file")
# update table display name
for dimension in cube_obj.dimensions:
if dimension.displayName and dimension.name and dimension.displayName != dimension.name:
......@@ -47,17 +51,22 @@ def _construct_star_schema_config_file(executer_instance, cubes_obj):
executer_instance.facts = cubes_obj.facts[0].table_name
db = MyDB(db=executer_instance.cube)
# load facts table
fusion = psql.read_sql_query(
"SELECT * FROM {0}".format(executer_instance.facts), db.engine)
for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items():
df = psql.read_sql_query(
"SELECT * FROM {0}".format(dimension_and_key.split('.')[0]),
db.connection)
memory_usage("1 - before executing query //// _construct_star_schema_config_file")
with db.engine as connection:
fusion = psql.read_sql_query(
"SELECT * FROM {0}".format(executer_instance.facts), connection)
fusion = fusion.merge(
df, left_on=fact_key, right_on=dimension_and_key.split('.')[1])
for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items():
df = psql.read_sql_query(
"SELECT * FROM {0}".format(dimension_and_key.split('.')[0]),
connection)
fusion = fusion.merge(
df, left_on=fact_key, right_on=dimension_and_key.split('.')[1])
memory_usage("2 - after query, before fetchall /////// _construct_star_schema_config_file")
# TODO CHOSE BETWEEN THOSES DF
# if separated dimensions
# fusion = fusion.merge(df, left_on=fact_key,right_on=dimension_and_key.split('.')[1])
......@@ -90,10 +99,16 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj):
if cubes_obj.facts[0].columns:
all_columns += cubes_obj.facts[0].columns
memory_usage("1 - before executing query //// 1111 _construct_web_star_schema_config_file ")
fusion = psql.read_sql_query(
"SELECT * FROM {0}".format(executer_instance.facts), db.engine)
memory_usage("2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file")
tables = {}
memory_usage("1 - before executing query //// 3333333333 _construct_web_star_schema_config_file ")
for table in cubes_obj.tables:
tab = psql.read_sql_query("SELECT * FROM {0}".format(table.name),
......@@ -118,11 +133,14 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj):
all_columns += list(tab.columns)
tables.update({table.name: tab})
memory_usage("2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file")
# measures in config-file only
if cubes_obj.facts[0].measures:
executer_instance.measures = cubes_obj.facts[0].measures
all_columns += cubes_obj.facts[0].measures
memory_usage("1 - before executing query //// 55555555 _construct_web_star_schema_config_file ")
for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items():
dimension_name = dimension_and_key.split('.')[0]
if dimension_name in tables.keys():
......@@ -138,5 +156,7 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj):
# remove suffixe from dimension and keep the same column name for facts
suffixes=('', '_y'))
memory_usage("2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file")
return fusion[[column for column in all_columns if 'id' != column[-2:]]]
......@@ -2,6 +2,7 @@ from __future__ import absolute_import, division, print_function
from sqlalchemy import inspect
from ..tools.mem_bench import memory_usage
from ..tools.connection import MyDB
import pandas.io.sql as psql
......@@ -16,6 +17,7 @@ def _load_tables_db(executer_instance):
db = MyDB(db_config_file_path=executer_instance.DATA_FOLDER,db=executer_instance.cube)
inspector = inspect(db.engine)
memory_usage("1 - before executing query //// _load_tables_db")
for table_name in inspector.get_table_names():
value = psql.read_sql_query(
'SELECT * FROM "{0}"'.format(table_name), db.engine)
......@@ -23,6 +25,7 @@ def _load_tables_db(executer_instance):
tables[table_name] = value[[
col for col in value.columns if col.lower()[-3:] != '_id'
]]
memory_usage("2 - after query, before fetchall /////// _load_tables_db")
return tables
......@@ -34,20 +37,22 @@ def _construct_star_schema_db(executer_instance):
:return: star schema DataFrame
"""
db = MyDB(db=executer_instance.cube)
memory_usage("1 - before executing query //// _construct_star_schema_db")
# load facts table
fusion = psql.read_sql_query(
'SELECT * FROM "{0}" '.format(executer_instance.facts), db.engine)
inspector = inspect(db.engine)
for db_table_name in inspector.get_table_names():
try:
fusion = fusion.merge(
psql.read_sql_query("SELECT * FROM {0}".format(
db_table_name[0]), db.engine))
except:
print('No common column')
pass
with db.engine as connection:
fusion = psql.read_sql_query(
'SELECT * FROM "{0}" '.format(executer_instance.facts), connection)
inspector = inspect(connection)
for db_table_name in inspector.get_table_names():
try:
fusion = fusion.merge(
psql.read_sql_query("SELECT * FROM {0}".format(
db_table_name[0]), connection))
except:
print('No common column')
pass
memory_usage("2 - after query, before fetchall /////// _construct_star_schema_db")
return fusion
Memory summary:1 - before executing query //// _load_tables_db
VM: 3160.52Mb
Memory summary:2 - after query, before fetchall /////// _load_tables_db
VM: 3268.52Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 3268.52Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 3269.02Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 3269.02Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 3270.77Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 3270.77Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 3271.02Mb
import os
from os.path import expanduser
from pympler import summary, muppy
import psutil
def get_virtual_memory_usage_kb():
"""
The process's current virtual memory size in Kb, as a float.
"""
return float(psutil.Process().memory_info_ex().vms) / 1024.0
def memory_usage(where):
"""
Print out a basic summary of memory usage.
"""
with open(os.path.join(expanduser('~'), 'bech_mem.txt'), mode='a+') as file:
mem_summary = summary.summarize(muppy.get_objects())
file.write("Memory summary:" + where + '\n\n')
summary.print_(mem_summary, limit=2)
file.write("VM: %.2fMb" % (get_virtual_memory_usage_kb() / 1024.0) + '\n\n')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment