Skip to content

Commit 9828159

Browse files
committed
Addition of GridFileBuilder class for on-the-fly writting of data to GridFS.
- Added GridFileBuilder - Added unity test for GridFileBuilder - Updated code style - Updated with coding style changes and addition of _insertChunk private method in GridFS
1 parent 4f5bc65 commit 9828159

File tree

3 files changed

+166
-1
lines changed

3 files changed

+166
-1
lines changed

src/mongo/client/gridfs.cpp

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "mongo/client/gridfs.h"
1919

20+
#include <algorithm>
2021
#include <boost/smart_ptr.hpp>
2122
#include <fcntl.h>
2223
#include <fstream>
@@ -35,6 +36,7 @@
3536

3637
namespace mongo {
3738

39+
using std::min;
3840
using std::auto_ptr;
3941
using std::cout;
4042
using std::ios;
@@ -209,6 +211,10 @@ namespace mongo {
209211
return _client.query( _filesNS.c_str() , o );
210212
}
211213

214+
void GridFS::_insertChunk( const GridFSChunk& chunk ) {
215+
_client.insert( _chunksNS.c_str() , chunk._data );
216+
}
217+
212218
BSONObj GridFile::getMetadata() const {
213219
BSONElement meta_element = _obj["metadata"];
214220
if( meta_element.eoo() ) {
@@ -259,5 +265,86 @@ namespace mongo {
259265
void GridFile::_exists() const {
260266
uassert( 10015 , "doesn't exists" , exists() );
261267
}
262-
268+
269+
GridFileBuilder::GridFileBuilder( GridFS* const grid ) :
270+
_grid( grid ),
271+
_chunkSize( grid->getChunkSize() ),
272+
_currentChunk( 0 ),
273+
_pendingData( new char[_chunkSize] ),
274+
_pendingDataSize( 0 ),
275+
_fileLength( 0 ) {
276+
_fileId.init();
277+
_fileIdObj = BSON( "_id" << _fileId );
278+
}
279+
280+
const char* GridFileBuilder::_appendChunk( const char* data,
281+
size_t length,
282+
bool forcePendingInsert ) {
283+
const char* const end = data + length;
284+
while (data < end) {
285+
size_t chunkLen = min( _chunkSize, static_cast<size_t>(end-data) );
286+
// the last chunk needs to be stored as pendingData, break while if
287+
// necessary
288+
if ((chunkLen < _chunkSize) && (!forcePendingInsert))
289+
break;
290+
GridFSChunk chunk( _fileIdObj, _currentChunk, data, chunkLen );
291+
_grid->_insertChunk( chunk );
292+
++_currentChunk;
293+
data += chunkLen;
294+
_fileLength += chunkLen;
295+
}
296+
return data;
297+
}
298+
299+
void GridFileBuilder::_appendPendingData() {
300+
if (_pendingDataSize > 0) {
301+
_appendChunk( _pendingData.get(), _pendingDataSize, true );
302+
_pendingDataSize = 0;
303+
}
304+
}
305+
306+
void GridFileBuilder::appendChunk( const char* data, size_t length ) {
307+
if (length == 0)
308+
return;
309+
310+
// check if there is pending data
311+
if (_pendingDataSize > 0) {
312+
size_t totalSize = _pendingDataSize + length;
313+
size_t size = min( _chunkSize, totalSize ) - _pendingDataSize;
314+
memcpy( _pendingData.get() + _pendingDataSize, data, size );
315+
_pendingDataSize += size;
316+
invariant( _pendingDataSize <= _chunkSize );
317+
if (_pendingDataSize == _chunkSize) {
318+
_appendPendingData();
319+
_appendChunk( data + size, length - size, false );
320+
}
321+
}
322+
else {
323+
const char* const end = data + length;
324+
// split data in _chunkSize blocks, and store as pending the last block if
325+
// necessary
326+
data = _appendChunk( data, length, false );
327+
// process pending data if necessary
328+
if (data != end) {
329+
size_t size = static_cast<size_t>(end-data);
330+
memcpy( _pendingData.get() + _pendingDataSize, data, size );
331+
_pendingDataSize += size;
332+
}
333+
}
334+
}
335+
336+
BSONObj GridFileBuilder::buildFile( const string& remoteName,
337+
const string& contentType ) {
338+
_appendPendingData();
339+
BSONObj ret = _grid->insertFile( remoteName, _fileId, _fileLength,
340+
contentType );
341+
// resets the object to allow more data append for a GridFile
342+
_currentChunk = 0;
343+
_pendingDataSize = 0;
344+
_fileLength = 0;
345+
_fileId.init();
346+
_fileIdObj = BSON( "_id" << _fileId );
347+
return ret;
348+
}
349+
263350
}

src/mongo/client/gridfs.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ namespace mongo {
2828

2929
class GridFS;
3030
class GridFile;
31+
class GridFileBuilder;
3132

3233
class MONGO_CLIENT_API GridFSChunk {
3334
public:
@@ -131,7 +132,12 @@ namespace mongo {
131132
// insert fileobject. All chunks must be in DB.
132133
BSONObj insertFile(const std::string& name, const OID& id, gridfs_offset length, const std::string& contentType);
133134

135+
// Insert a chunk into DB, this method is intended to be used by
136+
// GridFileBuilder to incrementally insert chunks
137+
void _insertChunk(const GridFSChunk& chunk);
138+
134139
friend class GridFile;
140+
friend class GridFileBuilder;
135141
};
136142

137143
/**
@@ -203,4 +209,54 @@ namespace mongo {
203209

204210
friend class GridFS;
205211
};
212+
213+
/**
214+
* class which allow to build GridFiles in a stream fashion way
215+
*/
216+
class GridFileBuilder {
217+
public:
218+
/**
219+
* @param grid - gridfs instance
220+
*/
221+
GridFileBuilder( GridFS* const grid );
222+
223+
/**
224+
* Appends a chunk of data. Data will be split as many times as
225+
* necessary in chunkSize blocks. Sizes not multiple of chunkSize will
226+
* copy the reamining bytes to a pendingData pointer. In this way,
227+
* it is possible to add data in a stream fashion way.
228+
* @param data - C string with data
229+
* @param length - size of the string
230+
*/
231+
void appendChunk( const char* data, size_t length );
232+
233+
/**
234+
* Inserts the description of the file in GridFS collection. Note that
235+
* the stream will be reinitialized after the build call, so it will be
236+
* possible to continue appending data to build another file.
237+
* @param remoteName filename to use for file stored in GridFS
238+
* @param contentType optional MIME type for this object.
239+
* (default is to omit)
240+
* @return the file object
241+
*/
242+
mongo::BSONObj buildFile( const std::string& remoteName,
243+
const std::string& contentType="" );
244+
245+
private:
246+
GridFS* const _grid;
247+
const size_t _chunkSize; // taken from GridFS in the constructor
248+
unsigned int _currentChunk;
249+
OID _fileId;
250+
BSONObj _fileIdObj;
251+
boost::scoped_array<char> _pendingData; // pointer with _chunkSize space
252+
size_t _pendingDataSize;
253+
gridfs_offset _fileLength;
254+
255+
const char* _appendChunk( const char* data, size_t length,
256+
bool forcePendingInsert );
257+
258+
void _appendPendingData();
259+
};
260+
261+
206262
}

src/mongo/unittest/gridfs_test.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* limitations under the License.
1414
*/
1515

16+
#include <algorithm>
1617
#include <fstream>
1718
#include <sstream>
1819
#include <string>
@@ -24,6 +25,7 @@ using boost::scoped_ptr;
2425
using std::auto_ptr;
2526
using std::ios;
2627
using std::ifstream;
28+
using std::min;
2729
using std::string;
2830
using std::stringstream;
2931

@@ -223,4 +225,24 @@ namespace {
223225
);
224226
ASSERT_EQUALS(files_bad->itcount(), 0);
225227
}
228+
229+
TEST_F(GridFSTest, GridFileBuilder) {
230+
GridFileBuilder gfb(_gfs.get());
231+
for (int i=0; i<DATA_LEN; i+=2)
232+
gfb.appendChunk(DATA + i, min(2, DATA_LEN - i));
233+
gfb.buildFile(DATA_NAME);
234+
GridFile gf = _gfs->findFile(DATA_NAME);
235+
ASSERT_EQUALS(gf.getNumChunks(), 1);
236+
}
237+
238+
TEST_F(GridFSTest, GridFileBuilderMultipleChunks) {
239+
_gfs->setChunkSize(1);
240+
GridFileBuilder gfb(_gfs.get());
241+
for (int i=0; i<DATA_LEN; i+=2)
242+
gfb.appendChunk(DATA + i, min(2, DATA_LEN - i));
243+
gfb.buildFile(DATA_NAME);
244+
GridFile gf = _gfs->findFile(DATA_NAME);
245+
ASSERT_EQUALS(gf.getNumChunks(), DATA_LEN);
246+
}
247+
226248
} // namespace

0 commit comments

Comments
 (0)