- 
                Notifications
    You must be signed in to change notification settings 
- Fork 0
Caching Multiple Files
        Hüseyin Tuğrul BÜYÜKIŞIK edited this page Feb 28, 2021 
        ·
        10 revisions
      
    Test system: Fx8150 3.6GHz no-turbo, HDD with maximum 160MB/s read speed (3 pass per file is equivalent to average of 55MB/s of single pass = 6GB total data = 1.6 minutes just for reading bytes of file), 1333MHz DDR3 RAM single-channel 4GB.
Below program encodes and caches 4 files (in 4 minutes and 13 seconds) and prints their number of sequences and nucleobase counts while consuming 2550MB VRAM and 461MB on RAM:
#include "FastaGeneIndexer.h"
#include "lib/CpuBenchmarker.h"
int main(int argC, char** argV)
{
    try
    {
    	FastaGeneIndexer cache[4];
		#pragma omp parallel for
    	for(int i=0;i<4;i++)
    	{
	  bool debug = true;
	  if(i==0)
		cache[i] = FastaGeneIndexer("./data/influenza.fna", debug); // 1.4GB
	  else if(i==1)
		cache[i] = FastaGeneIndexer("./data/influenza.faa", debug); // 0.5 GB
	  else if(i==2)
		cache[i] = FastaGeneIndexer("./data/vertebrate_mammalian_genomic.fna", debug); // 1.0 GB
	  else if(i==3)
		cache[i] = FastaGeneIndexer("./data/homo_sapiens_chromosome.fa", debug); // 3.2GB
    	}
    	for(int i=0;i<4;i++)
    	{
			std::cout<< cache[i].n()<<std::endl;
			std::cout << cache[i].getSymbolCount('A') << std::endl;
			std::cout << cache[i].getSymbolCount('C') << std::endl;
			std::cout << cache[i].getSymbolCount('G') << std::endl;
			std::cout << cache[i].getSymbolCount('T') << std::endl;
    	}
    }
    catch(std::exception & e)
    {
        std::cout<< e.what() <<std::endl;
    }
    return 0;
}
Below program caches only the 3.2GB file (in 2 minutes 42 seconds) using 1300 MB VRAM and 110MB RAM:
#include "FastaGeneIndexer.h"
#include "lib/CpuBenchmarker.h"
int main(int argC, char** argV)
{
    try
    {
    	FastaGeneIndexer cache = FastaGeneIndexer("./data/homo_sapiens_chromosome.fa", true); // 3.2GB
	std::cout<< cache.n()<<std::endl;
	std::cout << cache.getSymbolCount('A') << std::endl;
	std::cout << cache.getSymbolCount('C') << std::endl;
	std::cout << cache.getSymbolCount('G') << std::endl;
	std::cout << cache.getSymbolCount('T') << std::endl;
    }
    catch(std::exception & e)
    {
        std::cout<< e.what() <<std::endl;
    }
    return 0;
}