Commit de142672 authored by Levin Zimmermann's avatar Levin Zimmermann

restricted: Allow monkey-patched pandas.read_*

parent 91ad7df3
......@@ -36,6 +36,7 @@ from Products.ERP5Type.tests.utils import createZODBPythonScript
from Products.ERP5Type.tests.utils import removeZODBPythonScript
from Products.ERP5Type.patches.Restricted import allow_class_attribute
from Products.ERP5Type.patches.Restricted import (pandas_black_list, dataframe_black_list, series_black_list)
from Products.ERP5Type.patches.Pandas import pandas_read_function_to_restrict_tuple
from AccessControl import Unauthorized
from AccessControl.ZopeGuards import Unauthorized as ZopeGuardsUnauthorized
......@@ -635,6 +636,110 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase):
write_method('testPandasSeriesIOWrite.data')
'''.format(write_method=write_method))
def testPandasRestrictedReadFunctionAllowedInput(self):
"""
Test if patched pandas read_* functions parse string input in expected manner.
"""
read_function_to_test_data_dict = {
"read_json": (
# Normal input should be correctly handled
(
"[1, 2, 3]",
"[1, 2, 3]",
),
(
'{"column_name": [1, 2, 3], "another_column": [3, 9.2, 100]}',
'{"column_name": [1, 2, 3], "another_column": [3, 9.2, 100]}',
),
),
"read_csv": (
# Normal input should be correctly handled
(
r"11,2,300\n50.5,99,hello",
r"[[50.5, 99, 'hello']], columns='11 2 300'.split(' ')",
),
# Url like / file path like input will also just be read
# as an entry of the CSV content.
(
r"https://people.sc.fsu.edu/~jburkardt/data/csv/addresses.csv",
r"[], columns=['https://people.sc.fsu.edu/~jburkardt/data/csv/addresses.csv']",
),
(
r"file://path/to/csv/file.csv",
r"[], columns=['file://path/to/csv/file.csv']",
),
),
"read_fwf": (
# Normal input should be correctly handled
(
r"100\n200",
r"[[200]], columns=['100']",
),
# Url like / file path like input will also just be read
# as an entry of the Fwf content.
(
r"file://path/to/fwf/file.fwf",
r"[], columns=['file://path/to/fwf/file.fwf']",
),
),
}
for read_function, test_data in read_function_to_test_data_dict.items():
for read_argument, expected_data_frame_init in test_data:
self.createAndRunScript(
'''
import pandas as pd
expected_data_frame = pd.DataFrame({expected_data_frame_init})
return pd.{read_function}('{read_argument}').equals(expected_data_frame)
'''.format(
expected_data_frame_init=expected_data_frame_init,
read_function=read_function,
read_argument=read_argument,
),
expected=True
)
def testPandasRestrictedReadFunctionProhibitedInput(self):
"""
Test if patched pandas read_* functions raise with any input which isn't a string.
"""
for pandas_read_function in pandas_read_function_to_restrict_tuple:
for preparation, prohibited_input in (
('', 100),
('from StringIO import StringIO', 'StringIO("[1, 2, 3]")'),
):
self.assertRaises(
ZopeGuardsUnauthorized,
self.createAndRunScript,
'''
import pandas as pd
{preparation}
pd.{pandas_read_function}({prohibited_input})
'''.format(
preparation=preparation,
pandas_read_function=pandas_read_function,
prohibited_input=prohibited_input,
)
)
def testPandasReadJson(self):
# Test if file path, urls and other bad strings
# raise value errors
for malicous_input in (
# working json url
"https://github.com/LearnWebCode/json-example/raw/master/animals-1.json",
"/path/to/json/file.json",
"file://path/to/json/file.json",
):
self.assertRaises(
ValueError,
self.createAndRunScript,
'''
import pandas as pd
pd.read_json({})
'''.format(malicous_input)
)
def test_suite():
suite = unittest.TestSuite()
......
......@@ -96,6 +96,7 @@ from Products.ERP5Type.patches import users
from Products.ERP5Type.patches import Publish
from Products.ERP5Type.patches import WSGITask
from Products.ERP5Type.patches import urllib_opener
from Products.ERP5Type.patches import Pandas
# These symbols are required for backward compatibility
from Products.ERP5Type.patches.PropertyManager import ERP5PropertyManager
......
##############################################################################
#
# Copyright (c) 2010 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
##############################################################################
try:
import pandas as pd
# pandas is optional, see
# commit 020b1ea39b06f09e6bf493f9083566b43f43b074
# (https://lab.nexedi.com/nexedi/erp5/commit/020b1ea39b06f09e6bf493f9083566b43f43b074)
except ImportError:
pass
else:
import six
if six.PY2:
from StringIO import StringIO
else:
from io import StringIO
from AccessControl.ZopeGuards import Unauthorized as ZopeGuardsUnauthorized
def restrictPandasReadFunction(function_name):
original_function = getattr(pd, function_name)
def Pandas_read(data_string, *args, **kwargs):
# Strict: don't use 'isinstance', only allow buildin str
# objects
if type(data_string) is not str:
raise ZopeGuardsUnauthorized(
"Parsing object '%s' of type '%s' is prohibited!" % (data_string, type(data_string))
)
string_io = StringIO(data_string)
return original_function(string_io, *args, **kwargs)
disclaimer = """\n
Disclaimer:
This function has been patched by ERP5 for zope sandbox usage.
Only objects of type 'str' are valid inputs, file paths, files,
urls, etc. are prohibited or ignored.
"""
Pandas_read.__doc__ = original_function.__doc__ + disclaimer
setattr(pd, function_name, Pandas_read)
pandas_read_function_to_restrict_tuple = (
"read_json",
# "read_html", # needs installation of additional dependency: html5lib
"read_csv",
"read_fwf",
# "read_xml", # only available for pandas version >= 1.3.0
)
for pandas_read_function_to_restrict in pandas_read_function_to_restrict_tuple:
restrictPandasReadFunction(pandas_read_function_to_restrict)
......@@ -472,8 +472,8 @@ else:
ContainerAssertions[pd.Series] = _check_access_wrapper(
pd.Series, dict.fromkeys(series_black_list, restrictedMethod))
pandas_black_list = ('read_csv', 'read_json', 'read_pickle', 'read_hdf',
'read_fwf', 'read_excel', 'read_html', 'read_msgpack',
pandas_black_list = ('read_pickle', 'read_hdf',
'read_excel', 'read_html', 'read_msgpack',
'read_gbq', 'read_sas', 'read_stata')
ModuleSecurityInfo('pandas').declarePrivate(*pandas_black_list)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment