Commit 19490d30 authored by mouadh's avatar mouadh

optimization 3 (using string folding)

parent a9dd22c0
......@@ -7,6 +7,64 @@ from ..tools.connection import MyDB
import pandas.io.sql as psql
class StringFolder(object):
"""
Class that will fold strings. See 'fold_string'.
This object may be safely deleted or go out of scope when
strings have been folded.
"""
def __init__(self):
self.unicode_map = {}
def fold_string(self, s):
"""
Given a string (or unicode) parameter s, return a string object
that has the same value as s (and may be s). For all objects
with a given value, the same object will be returned. For unicode
objects that can be coerced to a string with the same value, a
string object will be returned.
If s is not a string or unicode object, it is returned unchanged.
:param s: a string or unicode object.
:return: a string or unicode object.
"""
# If s is not a string or unicode object, return it unchanged
if not isinstance(s, basestring):
return s
# If s is already a string, then str() has no effect.
# If s is Unicode, try and encode as a string and use intern.
# If s is Unicode and can't be encoded as a string, this try
# will raise a UnicodeEncodeError.
try:
return intern(str(s))
except UnicodeEncodeError:
# Fall through and handle s as Unicode
pass
# Look up the unicode value in the map and return
# the object from the map. If there is no matching entry,
# store this unicode object in the map and return it.
t = self.unicode_map.get(s, None)
if t is None:
# Put s in the map
t = self.unicode_map[s] = s
return t
def string_folding_wrapper(results):
"""
This generator yields rows from the results as tuples,
with all string values folded.
"""
# Get the list of keys so that we build tuples with all
# the values in key order.
keys = results.keys()
folder = StringFolder()
for row in results:
yield tuple(
folder.fold_string(row[key])
for key in keys
)
def _load_tables_db(executer_instance):
"""
Load tables from database.
......@@ -28,11 +86,8 @@ def _load_tables_db(executer_instance):
.execution_options(stream_results=True)
.execute('SELECT * FROM "{0}"'.format(table_name)))
# Fetch all the results of the query
# fetchall = results.fetchall()
# # fetchall = results.fetchone()
# value = pd.DataFrame(fetchall,columns=results.keys())
value = pd.DataFrame(iter(results),columns=results.keys()) # Pass results as an iterator
# value = pd.DataFrame(iter(results),columns=results.keys()) # Pass results as an iterator
value = pd.DataFrame(string_folding_wrapper(results),columns=results.keys())
tables[table_name] = value[[
col for col in value.columns if col.lower()[-3:] != '_id'
]]
......
Memory summary:1 - before executing query //// _load_tables_db
VM: 700.27Mb
Memory summary:2 - after query, before fetchall /////// _load_tables_db
VM: 2719.36Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 2719.36Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 2719.86Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 2719.86Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 2721.61Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 2721.61Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 2721.61Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 2721.61Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 2721.36Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 2721.36Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 2721.86Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 2721.86Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 2721.86Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 2721.86Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 2721.36Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 2721.36Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 2721.86Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 2721.86Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 2721.86Mb
****************************************
types | # objects | total size
======= | =========== | ============
dict | 7430 | 8.72 MB
str | 58996 | 8.30 MB
----------------------------
Memory summary:2 - after query, before fetchall /////// _load_tables_db
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 180 | 1.93 GB
dict | 7803 | 8.86 MB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 592 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 179 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 618 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 180 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 618 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 180 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 185 | 1.93 GB
<class 'pandas.core.series.Series | 618 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 630 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 185 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 185 | 1.93 GB
<class 'pandas.core.series.Series | 604 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 180 | 1.93 GB
<class 'pandas.core.series.Series | 592 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 181 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 181 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
<class 'pandas.core.series.Series | 629 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 641 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
<class 'pandas.core.series.Series | 615 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 603 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 180 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 181 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 629 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 181 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
<class 'pandas.core.series.Series | 629 | 1.93 GB
----------------------------
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.series.Series | 641 | 1.93 GB
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
----------------------------
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
types | # objects | total size
===================================== | =========== | ============
<class 'pandas.core.frame.DataFrame | 186 | 1.93 GB
<class 'pandas.core.series.Series | 615 | 1.93 GB
----------------------------
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment