Commit 3a80543e authored by Kevin Modzelewski's avatar Kevin Modzelewski

Surprisingly, you actually are allowed to modify a std::string

(as long as you get a pointer to it in the right way)
parent d14249f1
...@@ -13,7 +13,11 @@ ...@@ -13,7 +13,11 @@
// limitations under the License. // limitations under the License.
#include <cmath> #include <cmath>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "codegen/compvars.h"
#include "core/types.h" #include "core/types.h"
#include "gc/collector.h" #include "gc/collector.h"
#include "runtime/inline/boxing.h" #include "runtime/inline/boxing.h"
...@@ -24,9 +28,37 @@ namespace pyston { ...@@ -24,9 +28,37 @@ namespace pyston {
BoxedModule* posix_module; BoxedModule* posix_module;
namespace posix {
Box* urandom(Box* _n) {
RELEASE_ASSERT(_n->cls == int_cls, "");
int64_t n = static_cast<BoxedInt*>(_n)->n;
RELEASE_ASSERT(n < INT_MAX, "");
int fd = ::open("/dev/urandom", O_RDONLY);
RELEASE_ASSERT(fd > 0, "");
BoxedString* r = static_cast<BoxedString*>(PyString_FromStringAndSize(NULL, sizeof(n)));
RELEASE_ASSERT(r, "");
char* buf = PyString_AsString(r);
int total_read = 0;
while (total_read < n) {
int this_read = read(fd, buf, n - total_read);
assert(this_read > 0);
total_read += this_read;
}
return r;
}
}
void setupPosix() { void setupPosix() {
posix_module = createModule("posix", "__builtin__"); posix_module = createModule("posix", "__builtin__");
posix_module->giveAttr("urandom", new BoxedFunction(boxRTFunction((void*)posix::urandom, STR, 1)));
posix_module->giveAttr("error", OSError); posix_module->giveAttr("error", OSError);
} }
} }
...@@ -77,32 +77,6 @@ extern "C" PyObject* PyDict_New() { ...@@ -77,32 +77,6 @@ extern "C" PyObject* PyDict_New() {
return new BoxedDict(); return new BoxedDict();
} }
extern "C" PyObject* PyString_FromString(const char* s) {
return boxStrConstant(s);
}
extern "C" PyObject* PyString_FromStringAndSize(const char* s, ssize_t n) {
if (s == NULL)
return boxString(std::string(n, '\x00'));
return boxStrConstantSize(s, n);
}
extern "C" char* PyString_AsString(PyObject* o) {
assert(o->cls == str_cls);
// TODO this is very brittle, since
// - you are very much not supposed to change the data, and
// - the pointer doesn't have great longevity guarantees
// To satisfy this API we might have to change the string representation?
printf("Warning: PyString_AsString() currently has risky behavior\n");
return const_cast<char*>(static_cast<BoxedString*>(o)->s.data());
}
extern "C" Py_ssize_t PyString_Size(PyObject* s) {
RELEASE_ASSERT(s->cls == str_cls, "");
return static_cast<BoxedString*>(s)->s.size();
}
extern "C" int PyDict_SetItem(PyObject* mp, PyObject* _key, PyObject* _item) { extern "C" int PyDict_SetItem(PyObject* mp, PyObject* _key, PyObject* _item) {
Box* b = static_cast<Box*>(mp); Box* b = static_cast<Box*>(mp);
Box* key = static_cast<Box*>(_key); Box* key = static_cast<Box*>(_key);
......
...@@ -222,6 +222,7 @@ extern "C" Box* strMul(BoxedString* lhs, Box* rhs) { ...@@ -222,6 +222,7 @@ extern "C" Box* strMul(BoxedString* lhs, Box* rhs) {
else else
return NotImplemented; return NotImplemented;
// TODO: use createUninitializedString and getWriteableStringContents
int sz = lhs->s.size(); int sz = lhs->s.size();
char* buf = new char[sz * n + 1]; char* buf = new char[sz * n + 1];
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
...@@ -897,6 +898,46 @@ Box* strCount2(BoxedString* self, Box* elt) { ...@@ -897,6 +898,46 @@ Box* strCount2(BoxedString* self, Box* elt) {
return boxInt(strCount2Unboxed(self, elt)); return boxInt(strCount2Unboxed(self, elt));
} }
extern "C" PyObject* PyString_FromString(const char* s) {
return boxStrConstant(s);
}
BoxedString* createUninitializedString(ssize_t n) {
// I *think* this should avoid doing any copies, by using move constructors:
return new BoxedString(std::string(n, '\x00'));
}
char* getWriteableStringContents(BoxedString* s) {
ASSERT(s->s.size() > 0, "not sure whether this is valid for strings with zero size");
// After doing some reading, I think this is ok:
// http://stackoverflow.com/questions/14290795/why-is-modifying-a-string-through-a-retrieved-pointer-to-its-data-not-allowed
// In C++11, std::string is required to store its data contiguously.
// It looks like it's also required to make it available to write via the [] operator.
// - Taking a look at GCC's libstdc++, calling operator[] on a non-const string will return
// a writeable reference, and "unshare" the string.
// So surprisingly, this looks ok!
return &s->s[0];
}
extern "C" PyObject* PyString_FromStringAndSize(const char* s, ssize_t n) {
if (s == NULL)
return createUninitializedString(n);
return boxStrConstantSize(s, n);
}
extern "C" char* PyString_AsString(PyObject* o) {
RELEASE_ASSERT(o->cls == str_cls, "");
BoxedString* s = static_cast<BoxedString*>(o);
return getWriteableStringContents(s);
}
extern "C" Py_ssize_t PyString_Size(PyObject* s) {
RELEASE_ASSERT(s->cls == str_cls, "");
return static_cast<BoxedString*>(s)->s.size();
}
static Py_ssize_t string_buffer_getreadbuf(PyObject* self, Py_ssize_t index, const void** ptr) { static Py_ssize_t string_buffer_getreadbuf(PyObject* self, Py_ssize_t index, const void** ptr) {
RELEASE_ASSERT(index == 0, ""); RELEASE_ASSERT(index == 0, "");
// I think maybe this can just be a non-release assert? shouldn't be able to call this with // I think maybe this can just be a non-release assert? shouldn't be able to call this with
......
...@@ -92,11 +92,20 @@ extern "C" i64 unboxInt(Box*); ...@@ -92,11 +92,20 @@ extern "C" i64 unboxInt(Box*);
extern "C" Box* boxFloat(double d); extern "C" Box* boxFloat(double d);
extern "C" Box* boxInstanceMethod(Box* obj, Box* func); extern "C" Box* boxInstanceMethod(Box* obj, Box* func);
extern "C" Box* boxUnboundInstanceMethod(Box* func); extern "C" Box* boxUnboundInstanceMethod(Box* func);
extern "C" Box* boxStringPtr(const std::string* s); extern "C" Box* boxStringPtr(const std::string* s);
Box* boxString(const std::string& s); Box* boxString(const std::string& s);
Box* boxString(std::string&& s); Box* boxString(std::string&& s);
extern "C" BoxedString* boxStrConstant(const char* chars); extern "C" BoxedString* boxStrConstant(const char* chars);
extern "C" BoxedString* boxStrConstantSize(const char* chars, size_t n); extern "C" BoxedString* boxStrConstantSize(const char* chars, size_t n);
// creates an uninitialized string of length n; useful for directly constructing into the string and avoiding copies:
BoxedString* createUninitializedString(ssize_t n);
// Gets a writeable pointer to the contents of a string.
// Is only meant to be used with something just created from createUninitializedString(), though
// in theory it might work in more cases.
char* getWriteableStringContents(BoxedString* s);
extern "C" void listAppendInternal(Box* self, Box* v); extern "C" void listAppendInternal(Box* self, Box* v);
extern "C" void listAppendArrayInternal(Box* self, Box** v, int nelts); extern "C" void listAppendArrayInternal(Box* self, Box** v, int nelts);
extern "C" Box* boxCLFunction(CLFunction* f, BoxedClosure* closure, bool isGenerator, extern "C" Box* boxCLFunction(CLFunction* f, BoxedClosure* closure, bool isGenerator,
...@@ -216,7 +225,7 @@ public: ...@@ -216,7 +225,7 @@ public:
class BoxedString : public Box { class BoxedString : public Box {
public: public:
// const std::basic_string<char, std::char_traits<char>, StlCompatAllocator<char> > s; // const std::basic_string<char, std::char_traits<char>, StlCompatAllocator<char> > s;
const std::string s; std::string s;
BoxedString(const char* s, size_t n) __attribute__((visibility("default"))) : Box(str_cls), s(s, n) {} BoxedString(const char* s, size_t n) __attribute__((visibility("default"))) : Box(str_cls), s(s, n) {}
BoxedString(const std::string&& s) __attribute__((visibility("default"))) : Box(str_cls), s(std::move(s)) {} BoxedString(const std::string&& s) __attribute__((visibility("default"))) : Box(str_cls), s(std::move(s)) {}
......
# expected: fail # skip-if: True
# - warnings about PyString_AsString(), since that is allowed to be modified # - WIP, crashing somewhere
import hashlib import hashlib
#for m in [hashlib.md5(), hashlib.sha1(), hashlib.sha256(), hashlib.sha512()]: #for m in [hashlib.md5(), hashlib.sha1(), hashlib.sha256(), hashlib.sha512()]:
......
# expected: fail
# - warnings about PyString_AsString(), since that is allowed to be modified
try: try:
import _md5 as md5 import _md5 as md5
except ImportError: except ImportError:
......
# allow-warning: converting unicode literal to str
# currently broken:
# import os.path
import os
r1 = os.urandom(8)
r2 = os.urandom(8)
print len(r1), len(r2), type(r1), type(r2), r1 == r2
# expected: fail
# - warnings about PyString_AsString(), since that is allowed to be modified
try: try:
import _sha as sha import _sha as sha
except ImportError: except ImportError:
......
...@@ -106,7 +106,7 @@ def run_test(fn, check_stats, run_memcheck): ...@@ -106,7 +106,7 @@ def run_test(fn, check_stats, run_memcheck):
statchecks = [] statchecks = []
jit_args = ["-csrq"] + EXTRA_JIT_ARGS jit_args = ["-csrq"] + EXTRA_JIT_ARGS
expected = "success" expected = "success"
allow_warning = False allow_warnings = []
for l in open(fn): for l in open(fn):
l = l.strip() l = l.strip()
if not l: if not l:
...@@ -126,8 +126,8 @@ def run_test(fn, check_stats, run_memcheck): ...@@ -126,8 +126,8 @@ def run_test(fn, check_stats, run_memcheck):
skip = eval(skip_if) skip = eval(skip_if)
if skip: if skip:
return r + " (skipped due to 'skip-if: %s')" % skip_if[:30] return r + " (skipped due to 'skip-if: %s')" % skip_if[:30]
elif l.startswith("# allow-warning"): elif l.startswith("# allow-warning:"):
allow_warning = True allow_warnings.append("Warning: " + l.split(':', 1)[1].strip())
assert expected in ("success", "fail", "statfail"), expected assert expected in ("success", "fail", "statfail"), expected
...@@ -146,8 +146,14 @@ def run_test(fn, check_stats, run_memcheck): ...@@ -146,8 +146,14 @@ def run_test(fn, check_stats, run_memcheck):
elapsed = time.time() - start elapsed = time.time() - start
stats = {} stats = {}
if allow_warning: if allow_warnings:
out_lines = [l for l in out.split('\n') if not l.startswith("Warning: ")] out_lines = []
for l in out.split('\n'):
for regex in allow_warnings:
if re.match(l, regex):
break
else:
out_lines.append(l)
out = "\n".join(out_lines) out = "\n".join(out_lines)
if code == 0 and not TEST_PYPY: if code == 0 and not TEST_PYPY:
assert out.count("Stats:") == 1 assert out.count("Stats:") == 1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment