Commit 7bb5e112 authored by Julien Muchembled's avatar Julien Muchembled

Hash files while uploading them, and accept StringIO to avoid use of temporary files

When uploading big file, this avoids trashing the kernel cache once more.
parent 555902b4
......@@ -17,6 +17,7 @@ import base64
import hashlib
import httplib
import json
import os
import socket
import subprocess
import tempfile
......@@ -40,6 +41,35 @@ def urljoin(a, b):
return a + b
class hashing_file(object):
def __init__(self, file):
self._f = file
self._h = hashlib.sha512()
def hexdigest(self):
assert not self._f.read(1)
return self._h.hexdigest()
def read(self, *args, **kw):
d = self._f.read(*args, **kw)
self._h.update(d)
return d
def __len__(self):
f = self._f
try:
fd = f.fileno()
except AttributeError:
pos = f.tell()
try:
f.seek(0, 2)
return f.tell()
finally:
f.seek(pos)
return os.fstat(fd).st_size
class NetworkcacheClient(object):
'''
NetworkcacheClient is a wrapper for httplib.
......@@ -140,17 +170,9 @@ class NetworkcacheClient(object):
If urlmd5 is None it must only upload to SHACACHE.
Otherwise, it must create a new entry on SHADIR.
'''
sha512sum = hashlib.sha512()
# do not trust, go to beginning of opened file
file_descriptor.seek(0)
while True:
d = file_descriptor.read(sha512sum.block_size)
if not d:
break
sha512sum.update(d)
sha512sum = sha512sum.hexdigest()
file_descriptor.seek(0)
file_descriptor = hashing_file(file_descriptor)
if self.shacache_scheme == 'https':
shacache_connection = httplib.HTTPSConnection(self.shacache_host,
self.shacache_port, key_file=self.shacache_key_file,
......@@ -164,15 +186,15 @@ class NetworkcacheClient(object):
print 'uploaded'
result = shacache_connection.getresponse()
print 'answered'
data = result.read()
sha512sum = result.read()
print 'read'
finally:
shacache_connection.close()
if result.status != 201 or data != sha512sum:
raise UploadError('Failed to upload the file to SHACACHE Server.' \
'URL: %s. Response code: %s. Response data: %s' % \
(self.shacache_host, result.status, data))
if result.status != 201 or sha512sum != file_descriptor.hexdigest():
raise UploadError('Failed to upload the file to SHACACHE Server.'
'URL: %s. Response code: %s. Response data: %s'
% (self.shacache_host, result.status, sha512sum))
if key is not None:
if file_name is None or urlmd5 is None:
......@@ -222,15 +244,8 @@ class NetworkcacheClient(object):
If key is None, it must only upload to SHACACHE.
Otherwise, it must create a new entry on SHADIR.
'''
sha512sum = hashlib.sha512()
file_descriptor.seek(0)
while True:
d = file_descriptor.read(sha512sum.block_size)
if not d:
break
sha512sum.update(d)
sha512sum = sha512sum.hexdigest()
file_descriptor.seek(0)
file_descriptor = hashing_file(file_descriptor)
if self.shacache_scheme == 'https':
shacache_connection = httplib.HTTPSConnection(self.shacache_host,
self.shacache_port, key_file = self.shacache_key_file,
......@@ -242,16 +257,17 @@ class NetworkcacheClient(object):
shacache_connection.request('POST', self.shacache_path, file_descriptor,
self.shacache_header_dict)
result = shacache_connection.getresponse()
data = result.read()
sha512sum = result.read()
finally:
shacache_connection.close()
if result.status != 201 or data != sha512sum:
raise UploadError('Failed to upload the file to SHACACHE Server.' \
'URL: %s. Response code: %s. Response data: %s' % \
(self.shacache_host, result.status, data))
if result.status != 201 or sha512sum != file_descriptor.hexdigest():
raise UploadError('Failed to upload the file to SHACACHE Server.'
'URL: %s. Response code: %s. Response data: %s'
% (self.shacache_host, result.status, sha512sum))
if key is not None:
kw['sha512'] = data # always update sha512sum
kw['sha512'] = sha512sum # always update sha512sum
sha_entry = json.dumps(kw)
try:
signature = self._getSignatureString(sha_entry)
......@@ -283,6 +299,9 @@ class NetworkcacheClient(object):
def download(self, sha512sum):
''' Download the file.
It uses http GET request method.
# do not trust, go to beginning of opened file
file_descriptor.seek(0)
file_descriptor = hashing_file(file_descriptor)
'''
sha_cache_url = urljoin(self.shacache_url, sha512sum)
request = urllib2.Request(url=sha_cache_url, data=None,
......
......@@ -18,6 +18,7 @@ import unittest
import slapos.libnetworkcache
import slapos.signature
import sys
from cStringIO import StringIO
class NCHandler(BaseHTTPServer.BaseHTTPRequestHandler):
......@@ -150,7 +151,7 @@ class OfflineTest(unittest.TestCase):
def test_upload_offline(self):
nc = slapos.libnetworkcache.NetworkcacheClient(self.shacache_url,
self.shadir_url)
self.assertRaises(IOError, nc.upload, tempfile.TemporaryFile())
self.assertRaises(IOError, nc.upload, StringIO())
def test_init_method_normal_http_url(self):
"""
......@@ -192,9 +193,8 @@ class OnlineMixin:
if not 'TEST_SHA_CACHE' in os.environ and not 'TEST_SHA_DIR' in os.environ:
self.tree = tempfile.mkdtemp()
self.thread = Server.run(self.tree, (self.host, self.port), self.handler)
self.test_data = tempfile.TemporaryFile()
self.test_string = str(random.random())
self.test_data.write(self.test_string)
self.test_data = StringIO(self.test_string)
self.test_shasum = hashlib.sha512(self.test_string).hexdigest()
def tearDown(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment