Commit 729a81c5 authored by Marius Wachtler's avatar Marius Wachtler

Compress cache files and remove old cache files.

Adds the LZ4 compression library and use it for compressing cached objects.
This saves alot of space (on my test it reduces the required space to about one-tenth),
and adds a checksum to the file in order to detect truncated cache files,
without reducing the speed.
parent b6ebc815
......@@ -10,3 +10,6 @@
[submodule "test/integration/virtualenv"]
path = test/integration/virtualenv
url = https://github.com/dropbox/virtualenv
[submodule "lz4"]
path = lz4
url = git://github.com/Cyan4973/lz4.git
......@@ -116,6 +116,10 @@ ExternalProject_Add(libunwind
add_subdirectory(libpypa)
add_dependencies(pypa gitsubmodules)
# lz4
add_subdirectory(lz4/cmake_unofficial)
add_dependencies(lz4 gitsubmodules)
# valgrind
if(ENABLE_VALGRIND)
find_package(Valgrind REQUIRED)
......@@ -182,7 +186,7 @@ add_subdirectory(tools)
add_executable(pyston $<TARGET_OBJECTS:PYSTON_MAIN_OBJECT> $<TARGET_OBJECTS:PYSTON_OBJECTS> $<TARGET_OBJECTS:FROM_CPYTHON>)
# Wrap the stdlib in --whole-archive to force all the symbols to be included and eventually exported
target_link_libraries(pyston -Wl,--whole-archive stdlib -Wl,--no-whole-archive pthread m readline sqlite3 gmp ssl crypto unwind pypa double-conversion ${LLVM_LIBS} ${LIBLZMA_LIBRARIES} ${OPTIONAL_LIBRARIES})
target_link_libraries(pyston -Wl,--whole-archive stdlib -Wl,--no-whole-archive pthread m readline sqlite3 gmp ssl crypto unwind pypa liblz4 double-conversion ${LLVM_LIBS} ${LIBLZMA_LIBRARIES} ${OPTIONAL_LIBRARIES})
# copy src/codegen/parse_ast.py to the build directory
add_custom_command(TARGET pyston POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_SOURCE_DIR}/src/codegen/parse_ast.py ${CMAKE_BINARY_DIR}/src/codegen/parse_ast.py)
......
......@@ -88,3 +88,32 @@ products or services of Licensee, or any third party.
8. By copying, installing or otherwise using Python, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
------
LZ4 Library
Copyright (c) 2011-2014, Yann Collet
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
------
......@@ -155,6 +155,7 @@ COMMON_CXXFLAGS += -fexceptions -fno-rtti
COMMON_CXXFLAGS += -Wno-invalid-offsetof # allow the use of "offsetof", and we'll just have to make sure to only use it legally.
COMMON_CXXFLAGS += -DENABLE_INTEL_JIT_EVENTS=$(ENABLE_INTEL_JIT_EVENTS)
COMMON_CXXFLAGS += -I$(DEPS_DIR)/pypa-install/include
COMMON_CXXFLAGS += -I$(DEPS_DIR)/lz4-install/include
ifeq ($(ENABLE_VALGRIND),0)
COMMON_CXXFLAGS += -DNVALGRIND
......@@ -170,6 +171,7 @@ COMMON_CXXFLAGS += -DDEFAULT_PYTHON_MAJOR_VERSION=$(PYTHON_MAJOR_VERSION) -DDEFA
# Use our "custom linker" that calls gold if available
COMMON_LDFLAGS := -B$(TOOLS_DIR)/build_system -L/usr/local/lib -lpthread -lm -lunwind -llzma -L$(DEPS_DIR)/gcc-4.8.2-install/lib64 -lreadline -lgmp -lssl -lcrypto -lsqlite3
COMMON_LDFLAGS += $(DEPS_DIR)/pypa-install/lib/libpypa.a
COMMON_LDFLAGS += $(DEPS_DIR)/lz4-install/lib/liblz4.a
# Conditionally add libtinfo if available - otherwise nothing will be added
COMMON_LDFLAGS += `pkg-config tinfo 2>/dev/null && pkg-config tinfo --libs || echo ""`
......
......@@ -140,6 +140,15 @@ cd gtest-1.7.0
make -j4
```
### LZ4
```
cd ~/pyston_deps
git clone git://github.com/Cyan4973/lz4.git
mkdir lz4-install
cd lz4/lib
DESTDIR="$HOME/pyston_deps/lz4-install" PREFIX="/" make install
```
---
At this point you should be able to run `make check` (in the `~/pyston` directory) and pass the tests. See the main README for more information about available targets and options.
......
Subproject commit 160661c7a4cbf805f4af74d2e3932a17a66e6ce7
......@@ -8,6 +8,7 @@ set_source_files_properties(jit.cpp PROPERTIES COMPILE_DEFINITIONS "GITREV=${GIT
include_directories(${CMAKE_BINARY_DIR})
include_directories(${CMAKE_BINARY_DIR}/libunwind/include)
include_directories(${CMAKE_SOURCE_DIR}/libpypa/src)
include_directories(${CMAKE_SOURCE_DIR}/lz4/lib)
if(ENABLE_GPERFTOOLS)
set(OPTIONAL_SRCS ${OPTIONAL_SRCS} codegen/profiling/pprof.cpp)
......@@ -109,7 +110,7 @@ add_library(PYSTON_OBJECTS OBJECT ${OPTIONAL_SRCS}
add_dependencies(PYSTON_OBJECTS libunwind pypa ${LLVM_LIBS})
add_library(PYSTON_MAIN_OBJECT OBJECT jit.cpp)
add_dependencies(PYSTON_MAIN_OBJECT libunwind pypa ${LLVM_LIBS})
add_dependencies(PYSTON_MAIN_OBJECT libunwind pypa liblz4 ${LLVM_LIBS})
# build stdlib
add_subdirectory(runtime/inline)
......
......@@ -16,6 +16,7 @@
#include <cstdio>
#include <iostream>
#include <lz4frame.h>
#include <openssl/evp.h>
#include <unordered_map>
......@@ -120,6 +121,71 @@ static llvm::Module* loadStdlib() {
return m;
}
class CompressedFile {
public:
static bool writeFile(llvm::StringRef file_name, llvm::StringRef data) {
std::error_code error_code;
llvm::raw_fd_ostream file(file_name, error_code, llvm::sys::fs::F_RW);
if (error_code)
return false;
LZ4F_preferences_t preferences;
memset(&preferences, 0, sizeof(preferences));
preferences.frameInfo.contentChecksumFlag = contentChecksumEnabled;
preferences.frameInfo.contentSize = data.size();
std::vector<char> compressed;
size_t max_size = LZ4F_compressFrameBound(data.size(), &preferences);
compressed.resize(max_size);
size_t compressed_size = LZ4F_compressFrame(&compressed[0], max_size, data.data(), data.size(), &preferences);
if (LZ4F_isError(compressed_size))
return false;
file.write(compressed.data(), compressed_size);
return true;
}
static std::unique_ptr<llvm::MemoryBuffer> getFile(llvm::StringRef file_name) {
auto compressed_content = llvm::MemoryBuffer::getFile(file_name, -1, false);
if (!compressed_content)
return std::unique_ptr<llvm::MemoryBuffer>();
LZ4F_decompressionContext_t context;
LZ4F_createDecompressionContext(&context, LZ4F_VERSION);
LZ4F_frameInfo_t frame_info;
memset(&frame_info, 0, sizeof(frame_info));
const char* start = (*compressed_content)->getBufferStart();
size_t pos = 0;
size_t compressed_size = (*compressed_content)->getBufferSize();
size_t remaining = compressed_size - pos;
LZ4F_getFrameInfo(context, &frame_info, start, &remaining);
pos += remaining;
std::vector<char> uncompressed;
uncompressed.reserve(frame_info.contentSize);
while (pos < compressed_size) {
unsigned char buff[4096];
size_t buff_size = sizeof(buff);
remaining = compressed_size - pos;
size_t error_code = LZ4F_decompress(context, buff, &buff_size, start + pos, &remaining, NULL);
if (LZ4F_isError(error_code)) {
LZ4F_freeDecompressionContext(context);
return std::unique_ptr<llvm::MemoryBuffer>();
}
pos += remaining;
if (buff_size != 0)
uncompressed.insert(uncompressed.end(), buff, buff + buff_size);
}
LZ4F_freeDecompressionContext(context);
if (uncompressed.size() != frame_info.contentSize)
return std::unique_ptr<llvm::MemoryBuffer>();
return llvm::MemoryBuffer::getMemBufferCopy(llvm::StringRef(uncompressed.data(), uncompressed.size()));
}
};
class PystonObjectCache : public llvm::ObjectCache {
private:
// Stream which calculates the SHA256 hash of the data writen to.
......@@ -154,15 +220,18 @@ private:
}
};
llvm::SmallString<128> cache_dir;
std::string module_identifier;
std::string hash_before_codegen;
public:
PystonObjectCache() {
llvm::sys::fs::current_path(cache_dir);
llvm::sys::path::append(cache_dir, "pyston_object_cache");
llvm::sys::path::home_directory(cache_dir);
llvm::sys::path::append(cache_dir, ".cache");
llvm::sys::path::append(cache_dir, "pyston");
llvm::sys::path::append(cache_dir, "object_cache");
cleanupCacheDirectory();
}
......@@ -177,14 +246,10 @@ public:
llvm::SmallString<128> cache_file = cache_dir;
llvm::sys::path::append(cache_file, hash_before_codegen);
if (!llvm::sys::fs::exists(cache_dir.str()) && llvm::sys::fs::create_directory(cache_dir.str())) {
fprintf(stderr, "Unable to create cache directory\n");
if (!llvm::sys::fs::exists(cache_dir.str()) && llvm::sys::fs::create_directories(cache_dir.str()))
return;
}
std::error_code error_code;
llvm::raw_fd_ostream IRObjectFile(cache_file.c_str(), error_code, llvm::sys::fs::F_RW);
RELEASE_ASSERT(!error_code, "");
IRObjectFile << Obj.getBuffer();
CompressedFile::writeFile(cache_file, Obj.getBuffer());
}
#if LLVMREV < 215566
......@@ -211,15 +276,15 @@ public:
// - clear the cache directory
// - run pyston
// - run pyston a second time
// - Now look for "*_second" files in the cache directory and compare them to the "*_first" IR dump
// - Now look for "*_second.ll" files in the cache directory and compare them to the "*_first.ll" IR dump
std::string llvm_ir;
llvm::raw_string_ostream sstr(llvm_ir);
M->print(sstr, 0);
sstr.flush();
std::string filename = cache_dir.str().str() + "/" + module_identifier + "_first";
std::string filename = cache_dir.str().str() + "/" + module_identifier + "_first.ll";
if (llvm::sys::fs::exists(filename))
filename = cache_dir.str().str() + "/" + module_identifier + "_second";
filename = cache_dir.str().str() + "/" + module_identifier + "_second.ll";
FILE* f = fopen(filename.c_str(), "wt");
fwrite(llvm_ir.c_str(), 1, llvm_ir.size(), f);
fclose(f);
......@@ -230,19 +295,40 @@ public:
return NULL;
}
auto rtn = llvm::MemoryBuffer::getFile(cache_file.str(), -1, false);
if (!rtn) {
std::unique_ptr<llvm::MemoryBuffer> mem_buff = CompressedFile::getFile(cache_file);
if (!mem_buff) {
jit_objectcache_misses.log();
return NULL;
}
jit_objectcache_hits.log();
return mem_buff;
}
void cleanupCacheDirectory() {
// Find all files inside the cache directory, if the number of files is larger than
// MAX_OBJECT_CACHE_ENTRIES,
// sort them by last modification time and remove the oldest excessive ones.
typedef std::pair<std::string, llvm::sys::TimeValue> CacheFileEntry;
std::vector<CacheFileEntry> cache_files;
std::error_code ec;
for (llvm::sys::fs::directory_iterator file(cache_dir.str(), ec), end; !ec && file != end; file.increment(ec)) {
llvm::sys::fs::file_status status;
if (file->status(status))
continue; // ignore files where we can't retrieve the file status.
cache_files.emplace_back(std::make_pair(file->path(), status.getLastModificationTime()));
}
int num_expired = cache_files.size() - MAX_OBJECT_CACHE_ENTRIES;
if (num_expired <= 0)
return;
std::stable_sort(cache_files.begin(), cache_files.end(),
[](const CacheFileEntry& lhs, const CacheFileEntry& rhs) { return lhs.second < rhs.second; });
// MCJIT will want to write into this buffer, and we don't want that
// because the file has probably just been mmapped. Instead we make
// a copy. The filed-based buffer will be released when it goes
// out of scope.
return llvm::MemoryBuffer::getMemBufferCopy((*rtn)->getBuffer());
for (int i = 0; i < num_expired; ++i)
llvm::sys::fs::remove(cache_files[i].first);
}
};
......
......@@ -50,6 +50,8 @@ int OSR_THRESHOLD_T2 = 10000;
int REOPT_THRESHOLD_T2 = 10000;
int SPECULATION_THRESHOLD = 100;
int MAX_OBJECT_CACHE_ENTRIES = 500;
static bool _GLOBAL_ENABLE = 1;
bool ENABLE_ICS = 1 && _GLOBAL_ENABLE;
bool ENABLE_ICGENERICS = 1 && ENABLE_ICS;
......
......@@ -35,6 +35,7 @@ extern int OSR_THRESHOLD_INTERPRETER, REOPT_THRESHOLD_INTERPRETER;
extern int OSR_THRESHOLD_BASELINE, REOPT_THRESHOLD_BASELINE;
extern int OSR_THRESHOLD_T2, REOPT_THRESHOLD_T2;
extern int SPECULATION_THRESHOLD;
extern int MAX_OBJECT_CACHE_ENTRIES;
extern bool SHOW_DISASM, FORCE_INTERPRETER, FORCE_OPTIMIZE, PROFILE, DUMPJIT, TRAP, USE_STRIPPED_STDLIB,
CONTINUE_AFTER_FATAL, ENABLE_INTERPRETER, ENABLE_PYPA_PARSER, USE_REGALLOC_BASIC, PAUSE_AT_ABORT;
......
......@@ -9,7 +9,7 @@ add_custom_target(unittests)
macro(add_unittest unittest)
add_executable(${unittest}_unittest EXCLUDE_FROM_ALL ${unittest}.cpp $<TARGET_OBJECTS:PYSTON_OBJECTS> $<TARGET_OBJECTS:FROM_CPYTHON>)
target_link_libraries(${unittest}_unittest stdlib sqlite3 gmp ssl crypto readline pypa double-conversion unwind gtest gtest_main ${LLVM_LIBS} ${LIBLZMA_LIBRARIES})
target_link_libraries(${unittest}_unittest stdlib sqlite3 gmp ssl crypto readline pypa liblz4 double-conversion unwind gtest gtest_main ${LLVM_LIBS} ${LIBLZMA_LIBRARIES})
add_dependencies(unittests ${unittest}_unittest)
endmacro()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment