Commit 7bb5e112 authored by Julien Muchembled's avatar Julien Muchembled

Hash files while uploading them, and accept StringIO to avoid use of temporary files

When uploading big file, this avoids trashing the kernel cache once more.
parent 555902b4
...@@ -17,6 +17,7 @@ import base64 ...@@ -17,6 +17,7 @@ import base64
import hashlib import hashlib
import httplib import httplib
import json import json
import os
import socket import socket
import subprocess import subprocess
import tempfile import tempfile
...@@ -40,6 +41,35 @@ def urljoin(a, b): ...@@ -40,6 +41,35 @@ def urljoin(a, b):
return a + b return a + b
class hashing_file(object):
def __init__(self, file):
self._f = file
self._h = hashlib.sha512()
def hexdigest(self):
assert not self._f.read(1)
return self._h.hexdigest()
def read(self, *args, **kw):
d = self._f.read(*args, **kw)
self._h.update(d)
return d
def __len__(self):
f = self._f
try:
fd = f.fileno()
except AttributeError:
pos = f.tell()
try:
f.seek(0, 2)
return f.tell()
finally:
f.seek(pos)
return os.fstat(fd).st_size
class NetworkcacheClient(object): class NetworkcacheClient(object):
''' '''
NetworkcacheClient is a wrapper for httplib. NetworkcacheClient is a wrapper for httplib.
...@@ -140,17 +170,9 @@ class NetworkcacheClient(object): ...@@ -140,17 +170,9 @@ class NetworkcacheClient(object):
If urlmd5 is None it must only upload to SHACACHE. If urlmd5 is None it must only upload to SHACACHE.
Otherwise, it must create a new entry on SHADIR. Otherwise, it must create a new entry on SHADIR.
''' '''
sha512sum = hashlib.sha512()
# do not trust, go to beginning of opened file # do not trust, go to beginning of opened file
file_descriptor.seek(0) file_descriptor.seek(0)
while True: file_descriptor = hashing_file(file_descriptor)
d = file_descriptor.read(sha512sum.block_size)
if not d:
break
sha512sum.update(d)
sha512sum = sha512sum.hexdigest()
file_descriptor.seek(0)
if self.shacache_scheme == 'https': if self.shacache_scheme == 'https':
shacache_connection = httplib.HTTPSConnection(self.shacache_host, shacache_connection = httplib.HTTPSConnection(self.shacache_host,
self.shacache_port, key_file=self.shacache_key_file, self.shacache_port, key_file=self.shacache_key_file,
...@@ -164,15 +186,15 @@ class NetworkcacheClient(object): ...@@ -164,15 +186,15 @@ class NetworkcacheClient(object):
print 'uploaded' print 'uploaded'
result = shacache_connection.getresponse() result = shacache_connection.getresponse()
print 'answered' print 'answered'
data = result.read() sha512sum = result.read()
print 'read' print 'read'
finally: finally:
shacache_connection.close() shacache_connection.close()
if result.status != 201 or data != sha512sum: if result.status != 201 or sha512sum != file_descriptor.hexdigest():
raise UploadError('Failed to upload the file to SHACACHE Server.' \ raise UploadError('Failed to upload the file to SHACACHE Server.'
'URL: %s. Response code: %s. Response data: %s' % \ 'URL: %s. Response code: %s. Response data: %s'
(self.shacache_host, result.status, data)) % (self.shacache_host, result.status, sha512sum))
if key is not None: if key is not None:
if file_name is None or urlmd5 is None: if file_name is None or urlmd5 is None:
...@@ -222,15 +244,8 @@ class NetworkcacheClient(object): ...@@ -222,15 +244,8 @@ class NetworkcacheClient(object):
If key is None, it must only upload to SHACACHE. If key is None, it must only upload to SHACACHE.
Otherwise, it must create a new entry on SHADIR. Otherwise, it must create a new entry on SHADIR.
''' '''
sha512sum = hashlib.sha512()
file_descriptor.seek(0)
while True:
d = file_descriptor.read(sha512sum.block_size)
if not d:
break
sha512sum.update(d)
sha512sum = sha512sum.hexdigest()
file_descriptor.seek(0) file_descriptor.seek(0)
file_descriptor = hashing_file(file_descriptor)
if self.shacache_scheme == 'https': if self.shacache_scheme == 'https':
shacache_connection = httplib.HTTPSConnection(self.shacache_host, shacache_connection = httplib.HTTPSConnection(self.shacache_host,
self.shacache_port, key_file = self.shacache_key_file, self.shacache_port, key_file = self.shacache_key_file,
...@@ -242,16 +257,17 @@ class NetworkcacheClient(object): ...@@ -242,16 +257,17 @@ class NetworkcacheClient(object):
shacache_connection.request('POST', self.shacache_path, file_descriptor, shacache_connection.request('POST', self.shacache_path, file_descriptor,
self.shacache_header_dict) self.shacache_header_dict)
result = shacache_connection.getresponse() result = shacache_connection.getresponse()
data = result.read() sha512sum = result.read()
finally: finally:
shacache_connection.close() shacache_connection.close()
if result.status != 201 or data != sha512sum:
raise UploadError('Failed to upload the file to SHACACHE Server.' \ if result.status != 201 or sha512sum != file_descriptor.hexdigest():
'URL: %s. Response code: %s. Response data: %s' % \ raise UploadError('Failed to upload the file to SHACACHE Server.'
(self.shacache_host, result.status, data)) 'URL: %s. Response code: %s. Response data: %s'
% (self.shacache_host, result.status, sha512sum))
if key is not None: if key is not None:
kw['sha512'] = data # always update sha512sum kw['sha512'] = sha512sum # always update sha512sum
sha_entry = json.dumps(kw) sha_entry = json.dumps(kw)
try: try:
signature = self._getSignatureString(sha_entry) signature = self._getSignatureString(sha_entry)
...@@ -283,6 +299,9 @@ class NetworkcacheClient(object): ...@@ -283,6 +299,9 @@ class NetworkcacheClient(object):
def download(self, sha512sum): def download(self, sha512sum):
''' Download the file. ''' Download the file.
It uses http GET request method. It uses http GET request method.
# do not trust, go to beginning of opened file
file_descriptor.seek(0)
file_descriptor = hashing_file(file_descriptor)
''' '''
sha_cache_url = urljoin(self.shacache_url, sha512sum) sha_cache_url = urljoin(self.shacache_url, sha512sum)
request = urllib2.Request(url=sha_cache_url, data=None, request = urllib2.Request(url=sha_cache_url, data=None,
......
...@@ -18,6 +18,7 @@ import unittest ...@@ -18,6 +18,7 @@ import unittest
import slapos.libnetworkcache import slapos.libnetworkcache
import slapos.signature import slapos.signature
import sys import sys
from cStringIO import StringIO
class NCHandler(BaseHTTPServer.BaseHTTPRequestHandler): class NCHandler(BaseHTTPServer.BaseHTTPRequestHandler):
...@@ -150,7 +151,7 @@ class OfflineTest(unittest.TestCase): ...@@ -150,7 +151,7 @@ class OfflineTest(unittest.TestCase):
def test_upload_offline(self): def test_upload_offline(self):
nc = slapos.libnetworkcache.NetworkcacheClient(self.shacache_url, nc = slapos.libnetworkcache.NetworkcacheClient(self.shacache_url,
self.shadir_url) self.shadir_url)
self.assertRaises(IOError, nc.upload, tempfile.TemporaryFile()) self.assertRaises(IOError, nc.upload, StringIO())
def test_init_method_normal_http_url(self): def test_init_method_normal_http_url(self):
""" """
...@@ -192,9 +193,8 @@ class OnlineMixin: ...@@ -192,9 +193,8 @@ class OnlineMixin:
if not 'TEST_SHA_CACHE' in os.environ and not 'TEST_SHA_DIR' in os.environ: if not 'TEST_SHA_CACHE' in os.environ and not 'TEST_SHA_DIR' in os.environ:
self.tree = tempfile.mkdtemp() self.tree = tempfile.mkdtemp()
self.thread = Server.run(self.tree, (self.host, self.port), self.handler) self.thread = Server.run(self.tree, (self.host, self.port), self.handler)
self.test_data = tempfile.TemporaryFile()
self.test_string = str(random.random()) self.test_string = str(random.random())
self.test_data.write(self.test_string) self.test_data = StringIO(self.test_string)
self.test_shasum = hashlib.sha512(self.test_string).hexdigest() self.test_shasum = hashlib.sha512(self.test_string).hexdigest()
def tearDown(self): def tearDown(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment