|
27 | 27 | __all__ = ("PyMongo", "ASCENDING", "DESCENDING", "BSONObjectIdConverter", "BSONProvider")
|
28 | 28 |
|
29 | 29 | import hashlib
|
30 |
| -from collections import OrderedDict |
31 | 30 | from mimetypes import guess_type
|
32 | 31 | from typing import Any
|
33 | 32 |
|
@@ -67,8 +66,6 @@ def __init__(
|
67 | 66 | ) -> None:
|
68 | 67 | self.cx: MongoClient | None = None
|
69 | 68 | self.db: Database | None = None
|
70 |
| - self._hash_cache = OrderedDict() |
71 |
| - self._hash_limit = 128 |
72 | 69 |
|
73 | 70 | if app is not None:
|
74 | 71 | self.init_app(app, uri, *args, **kwargs)
|
@@ -187,19 +184,20 @@ def get_upload(filename):
|
187 | 184 | response.content_length = fileobj.length
|
188 | 185 | response.last_modified = fileobj.upload_date
|
189 | 186 |
|
190 |
| - # GridFS does not manage its own checksum, so we manage our own using its |
191 |
| - # metadata storage, to be used for the etag. |
192 |
| - sha1_sum = self._hash_cache.get(str(fileobj._id)) |
193 |
| - if sha1_sum is None: |
194 |
| - # Compute the checksum of the file for the etag. |
195 |
| - pos = fileobj.tell() |
196 |
| - raw_data = fileobj.read() |
197 |
| - fileobj.seek(pos) |
198 |
| - sha1_sum = hashlib.sha1(raw_data).hexdigest() |
199 |
| - while len(self._hash_cache) >= self._hash_limit: |
200 |
| - self._hash_cache.popitem() |
201 |
| - self._hash_cache[str(fileobj._id)] = sha1_sum |
202 |
| - response.set_etag(sha1_sum) |
| 187 | + # GridFS does not manage its own checksum. |
| 188 | + # Try to use a sha1 sum that we have added during a save_file. |
| 189 | + # Fall back to a legacy md5 sum if it exists. |
| 190 | + # Otherwise, compute the sha1 sum directly. |
| 191 | + try: |
| 192 | + etag = fileobj.sha1 |
| 193 | + except AttributeError: |
| 194 | + etag = fileobj.md5 |
| 195 | + if etag is None: |
| 196 | + pos = fileobj.tell() |
| 197 | + raw_data = fileobj.read() |
| 198 | + fileobj.seek(pos) |
| 199 | + etag = hashlib.sha1(raw_data).hexdigest() |
| 200 | + response.set_etag(etag) |
203 | 201 |
|
204 | 202 | response.cache_control.max_age = cache_for
|
205 | 203 | response.cache_control.public = True
|
@@ -249,5 +247,23 @@ def save_upload(filename):
|
249 | 247 | db_obj = self.db
|
250 | 248 | assert db_obj is not None, "Please initialize the app before calling save_file!"
|
251 | 249 | storage = GridFS(db_obj, base)
|
252 |
| - id = storage.put(fileobj, filename=filename, content_type=content_type, **kwargs) |
253 |
| - return id |
| 250 | + |
| 251 | + # GridFS does not manage its own checksum, so we attach a sha1 to the file |
| 252 | + # for use as an etag. |
| 253 | + hashingfile = _Wrapper(fileobj) |
| 254 | + with storage.new_file(filename=filename, content_type=content_type, **kwargs) as grid_file: |
| 255 | + grid_file.write(hashingfile) |
| 256 | + grid_file.sha1 = hashingfile.hash.hexdigest() |
| 257 | + return grid_file._id |
| 258 | + |
| 259 | + |
| 260 | +class _Wrapper: |
| 261 | + def __init__(self, file): |
| 262 | + self.file = file |
| 263 | + self.hash = hashlib.sha1() |
| 264 | + |
| 265 | + def read(self, n): |
| 266 | + data = self.file.read(n) |
| 267 | + if data: |
| 268 | + self.hash.update(data) |
| 269 | + return data |
0 commit comments