diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..46af265 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +bin +src/cluster/bin +src/cluster/meshclust diff --git a/src/Makefile b/src/Makefile index 3013ed0..8ce2a33 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,7 +1,10 @@ -# CXX = /usr/bin/c++ -CXX ?= g++ +ifeq ($(shell uname), Darwin) + CXX = xcrun -sdk macosx clang++ +else + CXX ?= g++ +endif -CXXFLAGS = -O3 -g -fmessage-length=0 -Wall -march=native -std=c++11 +CXXFLAGS = -std=c++11 -pedantic -Wall -Wno-overloaded-virtual -O3 -flto -march=native # # Objects diff --git a/src/cluster/Makefile b/src/cluster/Makefile index b1f03a6..a760bd8 100644 --- a/src/cluster/Makefile +++ b/src/cluster/Makefile @@ -1,13 +1,19 @@ TARGET ?= meshclust VERSION ?= 1.2.0 -CXX ?= g++ + +ifeq ($(shell uname), Darwin) + CXX = xcrun -sdk macosx clang++ +else + CXX ?= g++ +endif + ifeq ($(debug),yes) - CXXFLAGS += -ggdb -fopenmp + CXXFLAGS += -ggdb else - CXXFLAGS += -fopenmp -O3 -march=native -g + CXXFLAGS += -O3 -flto -march=native -DNDEBUG=1 endif -CXXFLAGS += -std=c++11 -DVERSION=\"$(VERSION)\" -LDFLAGS += -lm +CXXFLAGS += -std=c++11 -pedantic -Wall -Wno-overloaded-virtual -Wno-unused-variable -DVERSION=\"$(VERSION)\" +LDFLAGS += -flto -lm SOURCES := $(shell find ./src -name '*.cpp') OBJECTS = $(SOURCES:%.cpp=bin/%.o) diff --git a/src/cluster/src/ClusterFactory.h b/src/cluster/src/ClusterFactory.h index 61c49ca..7cfee23 100644 --- a/src/cluster/src/ClusterFactory.h +++ b/src/cluster/src/ClusterFactory.h @@ -22,7 +22,7 @@ template class ClusterFactory { public: - ClusterFactory(int k_len, int npp=std::numeric_limits::max()) : k(k_len), num_per_partition(npp) {} + ClusterFactory(int k_len, int npp=std::numeric_limits::max()) : num_per_partition(npp), k(k_len) {} std::vector*> build_points(vector files, std::function*(ChromosomeOneDigit*)> get_point); Point* get_histogram(ChromosomeOneDigit *chrom); Point* get_divergence_point(ChromosomeOneDigit *chrom); diff --git a/src/cluster/src/Runner.cpp b/src/cluster/src/Runner.cpp index 80bde2a..59153bd 100644 --- a/src/cluster/src/Runner.cpp +++ b/src/cluster/src/Runner.cpp @@ -321,21 +321,13 @@ void test() template int Runner::do_run() { - using pvec = vector *>; - using pmap = map*, pvec*>; - ClusterFactory factory(k); - // for (auto f : files) { - // cout << "File: " << f << endl; - // } + auto points = factory.build_points(files, [&](nonltr::ChromosomeOneDigit *p){ return factory.get_divergence_point(p); }); Trainer tr(points, sample_size, largest_count, similarity, pivots, global_mat, global_sigma, global_epsilon, align ? 0 : k); tr.train(); vector lengths; for (Point* p : points) { - // if (!align) { - // p->set_data_str(""); - // } lengths.push_back(p->get_length()); } // Initializing BVec @@ -348,28 +340,6 @@ int Runner::do_run() bv.insert(p); } bv.insert_finalize(); -// cout << "bv size: " << bv.report() << endl; - // Point* mid = points[points.size()/2]; - // auto rng = bv.get_range(mid->get_length() * 0.99, - // mid->get_length() / 0.99); - // auto begin = bv.iter(rng.first); - // auto end = bv.iter(rng.second); - // size_t before = bv.report(); - // for (int i = 0; i < 1; i++) { - // bool is_min = false; - // Point* p = tr.get_close(mid, begin, end, is_min); - // size_t after = bv.report(); - // if (is_min) { - // string expr = (after + 1 == before) ? "true" : "false"; - // if (expr == "false") { - // throw expr; - // } - // cout << expr << endl; - // cout << "is min" << endl; - // } else { - // cout << "is not min" << endl; - // } - // } factory.MS(bv, bandwidth, similarity, tr, output, iterations, delta); return 0; } diff --git a/src/cluster/src/SingleFeature.h b/src/cluster/src/SingleFeature.h index efa882c..e4f7506 100644 --- a/src/cluster/src/SingleFeature.h +++ b/src/cluster/src/SingleFeature.h @@ -8,9 +8,9 @@ template class SingleFeature { public: SingleFeature(std::function*, Point*)> f, bool is_sim_=true) - : raw(f), is_sim(is_sim_), min_set(false), max_set(false) {} + : raw(f), is_sim(is_sim_), max_set(false), min_set(false) {} SingleFeature(std::function*, Point*, const vector&, const vector&)> f, vector rrv, vector rrc, bool is_sim_=true) - : rraw(f), is_sim(is_sim_), min_set(false), max_set(false), rv(rrv), rc(rrc) {} + : rraw(f), rv(rrv), rc(rrc), is_sim(is_sim_), max_set(false), min_set(false) {} void normalize(const vector*,Point*> > &pairs); double operator()(Point*, Point*) const; double min, max; diff --git a/src/cluster/src/Trainer.h b/src/cluster/src/Trainer.h index 3b5fb6c..77179a6 100644 --- a/src/cluster/src/Trainer.h +++ b/src/cluster/src/Trainer.h @@ -12,7 +12,7 @@ template class Trainer { public: - Trainer(std::vector*> v, size_t num_points, int largest_count, double cutoff_, size_t max_pts_from_one_, double (&matrix)[4][4], double sig, double eps, int ksize) : points(v), n_points(num_points), cutoff(cutoff_), max_pts_from_one(max_pts_from_one_), k(ksize) { + Trainer(std::vector*> v, size_t num_points, int largest_count, double cutoff_, size_t max_pts_from_one_, double (&matrix)[4][4], double sig, double eps, int ksize) : points(v), n_points(num_points), max_pts_from_one(max_pts_from_one_), cutoff(cutoff_), k(ksize) { init(matrix, sig, eps); uintmax_t size = 1000 * 1000 * 10; log_table = new double[size]; diff --git a/src/nonltr/EnrichmentMarkovView.cpp b/src/nonltr/EnrichmentMarkovView.cpp index f886ac8..89eaf81 100644 --- a/src/nonltr/EnrichmentMarkovView.cpp +++ b/src/nonltr/EnrichmentMarkovView.cpp @@ -12,7 +12,7 @@ namespace nonltr { */ template EnrichmentMarkovView::EnrichmentMarkovView(int k, int order, int m) : - minObs(m), factor(10000.00), KmerHashTable(k) { + KmerHashTable(k), minObs(m), factor(10000.00) { initialize(order); } diff --git a/src/utility/AffineId.h b/src/utility/AffineId.h index 61173e7..02989d9 100644 --- a/src/utility/AffineId.h +++ b/src/utility/AffineId.h @@ -22,22 +22,14 @@ class AffineId { int len1; int len2; - //int lenTotal; int lenCS; int lenPath; - int * m; // Middle level - //int * l; // Lower level - int * u; // Upper level - - // const int MATCH = 4; // Score of a match - // const int MIS = -4; // Score of a mismatch - // const int OPEN = -2; // Score of a gap opening - // const int EXT = -1; // Score of a gap extension const int MATCH = 1; const int MIS = -1; const int OPEN = -2; const int EXT = -1; + void align(); public: diff --git a/src/utility/ILocation.h b/src/utility/ILocation.h index 53f1ea6..071def3 100644 --- a/src/utility/ILocation.h +++ b/src/utility/ILocation.h @@ -16,6 +16,7 @@ namespace utility { class ILocation { public: + inline virtual ~ILocation() {} virtual int getEnd() const = 0; virtual int getStart() const = 0; virtual void setEnd(int) = 0;