Skip to content

Commit 006e7bc

Browse files
committed
address review
1 parent 00a02fb commit 006e7bc

File tree

1 file changed

+34
-18
lines changed

1 file changed

+34
-18
lines changed

flask_pymongo/__init__.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
__all__ = ("PyMongo", "ASCENDING", "DESCENDING", "BSONObjectIdConverter", "BSONProvider")
2828

2929
import hashlib
30-
from collections import OrderedDict
3130
from mimetypes import guess_type
3231
from typing import Any
3332

@@ -67,8 +66,6 @@ def __init__(
6766
) -> None:
6867
self.cx: MongoClient | None = None
6968
self.db: Database | None = None
70-
self._hash_cache = OrderedDict()
71-
self._hash_limit = 128
7269

7370
if app is not None:
7471
self.init_app(app, uri, *args, **kwargs)
@@ -187,19 +184,20 @@ def get_upload(filename):
187184
response.content_length = fileobj.length
188185
response.last_modified = fileobj.upload_date
189186

190-
# GridFS does not manage its own checksum, so we manage our own using its
191-
# metadata storage, to be used for the etag.
192-
sha1_sum = self._hash_cache.get(str(fileobj._id))
193-
if sha1_sum is None:
194-
# Compute the checksum of the file for the etag.
195-
pos = fileobj.tell()
196-
raw_data = fileobj.read()
197-
fileobj.seek(pos)
198-
sha1_sum = hashlib.sha1(raw_data).hexdigest()
199-
while len(self._hash_cache) >= self._hash_limit:
200-
self._hash_cache.popitem()
201-
self._hash_cache[str(fileobj._id)] = sha1_sum
202-
response.set_etag(sha1_sum)
187+
# GridFS does not manage its own checksum.
188+
# Try to use a sha1 sum that we have added during a save_file.
189+
# Fall back to a legacy md5 sum if it exists.
190+
# Otherwise, compute the sha1 sum directly.
191+
try:
192+
etag = fileobj.sha1
193+
except AttributeError:
194+
etag = fileobj.md5
195+
if etag is None:
196+
pos = fileobj.tell()
197+
raw_data = fileobj.read()
198+
fileobj.seek(pos)
199+
etag = hashlib.sha1(raw_data).hexdigest()
200+
response.set_etag(etag)
203201

204202
response.cache_control.max_age = cache_for
205203
response.cache_control.public = True
@@ -249,5 +247,23 @@ def save_upload(filename):
249247
db_obj = self.db
250248
assert db_obj is not None, "Please initialize the app before calling save_file!"
251249
storage = GridFS(db_obj, base)
252-
id = storage.put(fileobj, filename=filename, content_type=content_type, **kwargs)
253-
return id
250+
251+
# GridFS does not manage its own checksum, so we attach a sha1 to the file
252+
# for use as an etag.
253+
hashingfile = _Wrapper(fileobj)
254+
with storage.new_file(filename=filename, content_type=content_type, **kwargs) as grid_file:
255+
grid_file.write(hashingfile)
256+
grid_file.sha1 = hashingfile.hash.hexdigest()
257+
return grid_file._id
258+
259+
260+
class _Wrapper:
261+
def __init__(self, file):
262+
self.file = file
263+
self.hash = hashlib.sha1()
264+
265+
def read(self, n):
266+
data = self.file.read(n)
267+
if data:
268+
self.hash.update(data)
269+
return data

0 commit comments

Comments
 (0)