Commit 3a80543e authored by Kevin Modzelewski's avatar Kevin Modzelewski

Surprisingly, you actually are allowed to modify a std::string

(as long as you get a pointer to it in the right way)
parent d14249f1
......@@ -13,7 +13,11 @@
// limitations under the License.
#include <cmath>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "codegen/compvars.h"
#include "core/types.h"
#include "gc/collector.h"
#include "runtime/inline/boxing.h"
......@@ -24,9 +28,37 @@ namespace pyston {
BoxedModule* posix_module;
namespace posix {
Box* urandom(Box* _n) {
RELEASE_ASSERT(_n->cls == int_cls, "");
int64_t n = static_cast<BoxedInt*>(_n)->n;
RELEASE_ASSERT(n < INT_MAX, "");
int fd = ::open("/dev/urandom", O_RDONLY);
RELEASE_ASSERT(fd > 0, "");
BoxedString* r = static_cast<BoxedString*>(PyString_FromStringAndSize(NULL, sizeof(n)));
RELEASE_ASSERT(r, "");
char* buf = PyString_AsString(r);
int total_read = 0;
while (total_read < n) {
int this_read = read(fd, buf, n - total_read);
assert(this_read > 0);
total_read += this_read;
}
return r;
}
}
void setupPosix() {
posix_module = createModule("posix", "__builtin__");
posix_module->giveAttr("urandom", new BoxedFunction(boxRTFunction((void*)posix::urandom, STR, 1)));
posix_module->giveAttr("error", OSError);
}
}
......@@ -77,32 +77,6 @@ extern "C" PyObject* PyDict_New() {
return new BoxedDict();
}
extern "C" PyObject* PyString_FromString(const char* s) {
return boxStrConstant(s);
}
extern "C" PyObject* PyString_FromStringAndSize(const char* s, ssize_t n) {
if (s == NULL)
return boxString(std::string(n, '\x00'));
return boxStrConstantSize(s, n);
}
extern "C" char* PyString_AsString(PyObject* o) {
assert(o->cls == str_cls);
// TODO this is very brittle, since
// - you are very much not supposed to change the data, and
// - the pointer doesn't have great longevity guarantees
// To satisfy this API we might have to change the string representation?
printf("Warning: PyString_AsString() currently has risky behavior\n");
return const_cast<char*>(static_cast<BoxedString*>(o)->s.data());
}
extern "C" Py_ssize_t PyString_Size(PyObject* s) {
RELEASE_ASSERT(s->cls == str_cls, "");
return static_cast<BoxedString*>(s)->s.size();
}
extern "C" int PyDict_SetItem(PyObject* mp, PyObject* _key, PyObject* _item) {
Box* b = static_cast<Box*>(mp);
Box* key = static_cast<Box*>(_key);
......
......@@ -222,6 +222,7 @@ extern "C" Box* strMul(BoxedString* lhs, Box* rhs) {
else
return NotImplemented;
// TODO: use createUninitializedString and getWriteableStringContents
int sz = lhs->s.size();
char* buf = new char[sz * n + 1];
for (int i = 0; i < n; i++) {
......@@ -897,6 +898,46 @@ Box* strCount2(BoxedString* self, Box* elt) {
return boxInt(strCount2Unboxed(self, elt));
}
extern "C" PyObject* PyString_FromString(const char* s) {
return boxStrConstant(s);
}
BoxedString* createUninitializedString(ssize_t n) {
// I *think* this should avoid doing any copies, by using move constructors:
return new BoxedString(std::string(n, '\x00'));
}
char* getWriteableStringContents(BoxedString* s) {
ASSERT(s->s.size() > 0, "not sure whether this is valid for strings with zero size");
// After doing some reading, I think this is ok:
// http://stackoverflow.com/questions/14290795/why-is-modifying-a-string-through-a-retrieved-pointer-to-its-data-not-allowed
// In C++11, std::string is required to store its data contiguously.
// It looks like it's also required to make it available to write via the [] operator.
// - Taking a look at GCC's libstdc++, calling operator[] on a non-const string will return
// a writeable reference, and "unshare" the string.
// So surprisingly, this looks ok!
return &s->s[0];
}
extern "C" PyObject* PyString_FromStringAndSize(const char* s, ssize_t n) {
if (s == NULL)
return createUninitializedString(n);
return boxStrConstantSize(s, n);
}
extern "C" char* PyString_AsString(PyObject* o) {
RELEASE_ASSERT(o->cls == str_cls, "");
BoxedString* s = static_cast<BoxedString*>(o);
return getWriteableStringContents(s);
}
extern "C" Py_ssize_t PyString_Size(PyObject* s) {
RELEASE_ASSERT(s->cls == str_cls, "");
return static_cast<BoxedString*>(s)->s.size();
}
static Py_ssize_t string_buffer_getreadbuf(PyObject* self, Py_ssize_t index, const void** ptr) {
RELEASE_ASSERT(index == 0, "");
// I think maybe this can just be a non-release assert? shouldn't be able to call this with
......
......@@ -92,11 +92,20 @@ extern "C" i64 unboxInt(Box*);
extern "C" Box* boxFloat(double d);
extern "C" Box* boxInstanceMethod(Box* obj, Box* func);
extern "C" Box* boxUnboundInstanceMethod(Box* func);
extern "C" Box* boxStringPtr(const std::string* s);
Box* boxString(const std::string& s);
Box* boxString(std::string&& s);
extern "C" BoxedString* boxStrConstant(const char* chars);
extern "C" BoxedString* boxStrConstantSize(const char* chars, size_t n);
// creates an uninitialized string of length n; useful for directly constructing into the string and avoiding copies:
BoxedString* createUninitializedString(ssize_t n);
// Gets a writeable pointer to the contents of a string.
// Is only meant to be used with something just created from createUninitializedString(), though
// in theory it might work in more cases.
char* getWriteableStringContents(BoxedString* s);
extern "C" void listAppendInternal(Box* self, Box* v);
extern "C" void listAppendArrayInternal(Box* self, Box** v, int nelts);
extern "C" Box* boxCLFunction(CLFunction* f, BoxedClosure* closure, bool isGenerator,
......@@ -216,7 +225,7 @@ public:
class BoxedString : public Box {
public:
// const std::basic_string<char, std::char_traits<char>, StlCompatAllocator<char> > s;
const std::string s;
std::string s;
BoxedString(const char* s, size_t n) __attribute__((visibility("default"))) : Box(str_cls), s(s, n) {}
BoxedString(const std::string&& s) __attribute__((visibility("default"))) : Box(str_cls), s(std::move(s)) {}
......
# expected: fail
# - warnings about PyString_AsString(), since that is allowed to be modified
# skip-if: True
# - WIP, crashing somewhere
import hashlib
#for m in [hashlib.md5(), hashlib.sha1(), hashlib.sha256(), hashlib.sha512()]:
......
# expected: fail
# - warnings about PyString_AsString(), since that is allowed to be modified
try:
import _md5 as md5
except ImportError:
......
# allow-warning: converting unicode literal to str
# currently broken:
# import os.path
import os
r1 = os.urandom(8)
r2 = os.urandom(8)
print len(r1), len(r2), type(r1), type(r2), r1 == r2
# expected: fail
# - warnings about PyString_AsString(), since that is allowed to be modified
try:
import _sha as sha
except ImportError:
......
......@@ -106,7 +106,7 @@ def run_test(fn, check_stats, run_memcheck):
statchecks = []
jit_args = ["-csrq"] + EXTRA_JIT_ARGS
expected = "success"
allow_warning = False
allow_warnings = []
for l in open(fn):
l = l.strip()
if not l:
......@@ -126,8 +126,8 @@ def run_test(fn, check_stats, run_memcheck):
skip = eval(skip_if)
if skip:
return r + " (skipped due to 'skip-if: %s')" % skip_if[:30]
elif l.startswith("# allow-warning"):
allow_warning = True
elif l.startswith("# allow-warning:"):
allow_warnings.append("Warning: " + l.split(':', 1)[1].strip())
assert expected in ("success", "fail", "statfail"), expected
......@@ -146,8 +146,14 @@ def run_test(fn, check_stats, run_memcheck):
elapsed = time.time() - start
stats = {}
if allow_warning:
out_lines = [l for l in out.split('\n') if not l.startswith("Warning: ")]
if allow_warnings:
out_lines = []
for l in out.split('\n'):
for regex in allow_warnings:
if re.match(l, regex):
break
else:
out_lines.append(l)
out = "\n".join(out_lines)
if code == 0 and not TEST_PYPY:
assert out.count("Stats:") == 1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment