diff --git a/.gitignore b/.gitignore index 8886ed2..567de78 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ deps/deps.jl *.ipynb_checkpoints -Manifest.toml +/Manifest.toml +/*.png diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..c2ce2b0 --- /dev/null +++ b/NOTICE @@ -0,0 +1,32 @@ +Contains code modified from umap (https://raw.githubusercontent.com/lmcinnes/umap/) +which is available under the following License. + +BSD 3-Clause License + +Copyright (c) 2017, Leland McInnes +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/examples/MNIST_demo.jl b/examples/MNIST_demo.jl new file mode 100644 index 0000000..66fba89 --- /dev/null +++ b/examples/MNIST_demo.jl @@ -0,0 +1,45 @@ +using UMAP +using MLDatasets +using PyCall +const py_umap = pyimport_conda("umap", "umap-learn") + +include("plotting.jl") + +# First, let's get a small sample of the MNIST data. +n_points = 10_000 +X = reshape(MNIST.traintensor(Float64)[:, :, 1:n_points], 28^2, :) + +# We'll also get the labels +y = MNIST.trainlabels()[1:n_points] + +# Now we run UMAP super and unsupervised, via PyCall and via UMAP.jl + +## try to match py_umap's number of nndescent iterations +nndescent_kwargs = (max_iters = max(5, round(Int, log2(n_points))), sample_rate = 1) +unsup = UMAP_(X; n_neighbors=10, min_dist=0.001, n_epochs=200, + nndescent_kwargs = nndescent_kwargs) +py_unsup = py_umap.UMAP(min_dist=0.001, n_epochs=200, n_neighbors=10).fit(permutedims(X)) + +scene = plot_umap_comparison((permutedims(py_unsup.embedding_), y), (unsup.embedding, y); + titles=("PyUMAP (unsupervised)", "UMAP.jl (unsupervised)")) +save("MNIST_py_vs_jl_unsupervised.png", scene, px_per_unit=3, resolution=(1440, 810)) + + +sup = UMAP_(X, y; n_neighbors=10, min_dist=0.001, n_epochs=200, far_dist=5.0, + nndescent_kwargs = nndescent_kwargs) +py_sup = py_umap.UMAP(min_dist=0.001, n_epochs=200, n_neighbors=10).fit(permutedims(X), y) + +scene = plot_umap_comparison((permutedims(py_sup.embedding_), y), (sup.embedding, y); + titles=("PyUMAP (supervised)", "UMAP.jl (supervised)")) +save("MNIST_py_vs_jl_supervised.png", scene, px_per_unit=3, resolution=(1440, 810)) + + +scene = plot_umap_comparison((unsup.embedding, y), (sup.embedding, y); + titles=("UMAP.jl (unsupervised)", "UMAP.jl (supervised)")) +save("MNIST_jl_unsup_vs_supervised.png", scene, px_per_unit=3, resolution=(1440, 810)) + + +scene = plot_umap_comparison((permutedims(py_unsup.embedding_), y), + (permutedims(py_sup.embedding_), y); + titles=("PyUMAP (unsupervised)", "PyUMAP (supervised)")) +save("MNIST_py_unsup_vs_supervised.png", scene, px_per_unit=3, resolution=(1440, 810)) diff --git a/examples/MNIST_no_python.jl b/examples/MNIST_no_python.jl new file mode 100644 index 0000000..055be0d --- /dev/null +++ b/examples/MNIST_no_python.jl @@ -0,0 +1,50 @@ +using UMAP +using MLDatasets +using Distances + +include("plotting.jl") + +# First, let's get a small sample of the MNIST data. +n_points = 1_000 +X = reshape(MNIST.traintensor(Float64)[:, :, 1:n_points], 28^2, :) + +# We'll also get the labels +y = MNIST.trainlabels()[1:n_points] + +# Now we run UMAP super and unsupervised, via PyCall and via UMAP.jl + +## try to match py_umap's number of nndescent iterations +nndescent_kwargs = (max_iters = max(5, round(Int, log2(n_points))), sample_rate = 1) +n_neighbors = 20 +n_neighbors_meta = 200 + +unsup = Embedding(DataView(X; metric=Euclidean(), n_neighbors, nndescent_kwargs); min_dist=0.001, n_epochs=200) + +sup = Embedding(DataView(X; metric=Euclidean(), n_neighbors, nndescent_kwargs), DataView(y;metric=Categorical(far_dist=5.0), n_neighbors=n_neighbors_meta, nndescent_kwargs); min_dist=0.001, n_epochs=200) + +# Bug in NearestNeighbors or Distances? +# NN calls `result_type(metric, data[1], data[2])` instead of `result_type(metric, typeof(data[1]), typeof(data[2])`. +# But this works for arrays... +Distances.result_type(M, ::Int, ::Int) = Distances.result_type(M, Int, Int) +# +let + scene = plot_umap_comparison((unsup.embedding, y), (sup.embedding, y); + titles=("UMAP.jl (unsupervised)", "UMAP.jl (supervised)"), markersize=5px) + save("MNIST_jl_unsup_vs_supervised.png", scene, px_per_unit=3, resolution=(1440, 810)) +end + + + +for mix_weight in (0.001, 0.25, 0.5, 0.75, 0.999), n_neighbors_meta in (20, 200) + sup_cts = Embedding(DataView(X; metric=Euclidean(), n_neighbors, nndescent_kwargs), DataView(y;metric=Euclidean(), n_neighbors=n_neighbors_meta, nndescent_kwargs); min_dist=0.001, n_epochs=200, mix_weight) + + scene = plot_umap_comparison((sup.embedding, y), (sup_cts.embedding, y); + titles=("UMAP.jl (supervised; categorical)", "UMAP.jl (supervised; continuous) $(n_neighbors_meta) neighbors), mix_weight=$(mix_weight)"), markersize=5px) + save("MNIST_jl_vs_jl_supervised_cat_vs_cts_max_weight_$(mix_weight)_neighbors_$(n_neighbors_meta).png", scene, px_per_unit=3, resolution=(1440, 810)) + + + scene = plot_umap_comparison((unsup.embedding, y), (sup_cts.embedding, y); + titles=("UMAP.jl (unsupervised)", "UMAP.jl (supervised; continuous) $(n_neighbors_meta) neighbors), mix_weight=$(mix_weight)"), markersize=5px) +save("MNIST_jl_vs_jl_unsupervised_vs_cts_max_weight_$(mix_weight)_neighbors_$(n_neighbors_meta).png", scene, px_per_unit=3, resolution=(1440, 810)) + +end diff --git a/examples/Manifest.toml b/examples/Manifest.toml new file mode 100644 index 0000000..0b7d1e6 --- /dev/null +++ b/examples/Manifest.toml @@ -0,0 +1,1125 @@ +# This file is machine-generated - editing it directly is not advised + +[[AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "0.5.0" + +[[AbstractPlotting]] +deps = ["Animations", "ColorBrewer", "ColorSchemes", "ColorTypes", "Colors", "Contour", "Distributions", "DocStringExtensions", "FFMPEG", "FileIO", "FixedPointNumbers", "Formatting", "FreeType", "FreeTypeAbstraction", "GeometryBasics", "GridLayoutBase", "ImageIO", "IntervalSets", "Isoband", "KernelDensity", "LinearAlgebra", "Markdown", "Match", "Observables", "Packing", "PaddedViews", "PlotUtils", "PolygonOps", "Printf", "Random", "Serialization", "Showoff", "SignedDistanceFields", "SparseArrays", "StaticArrays", "Statistics", "StatsBase", "StructArrays", "UnicodeFun"] +git-tree-sha1 = "5e0aabc2a2290fc67e109b85aee43757d54dc022" +uuid = "537997a7-5e4e-5d89-9595-2241ea00577e" +version = "0.13.10" + +[[Adapt]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "42c42f2221906892ceb765dbcb1a51deeffd86d7" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "2.3.0" + +[[Animations]] +deps = ["Colors"] +git-tree-sha1 = "e81c509d2c8e49592413bfb0bb3b08150056c79d" +uuid = "27a7e980-b3e6-11e9-2bcd-0b925532e340" +version = "0.4.1" + +[[ArnoldiMethod]] +deps = ["DelimitedFiles", "LinearAlgebra", "Random", "SparseArrays", "StaticArrays", "Test"] +git-tree-sha1 = "2b6845cea546604fb4dca4e31414a6a59d39ddcd" +uuid = "ec485272-7323-5ecc-a04f-4719b315124d" +version = "0.0.4" + +[[Arpack]] +deps = ["Arpack_jll", "Libdl", "LinearAlgebra"] +git-tree-sha1 = "2ff92b71ba1747c5fdd541f8fc87736d82f40ec9" +uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" +version = "0.4.0" + +[[Arpack_jll]] +deps = ["Libdl", "OpenBLAS_jll", "Pkg"] +git-tree-sha1 = "e214a9b9bd1b4e1b4f15b22c0994862b66af7ff7" +uuid = "68821587-b530-5797-8361-c406ea357684" +version = "3.5.0+3" + +[[ArrayInterface]] +deps = ["LinearAlgebra", "Requires", "SparseArrays"] +git-tree-sha1 = "c121e78a689da38e4199cf964962385a4810d827" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "2.14.2" + +[[Artifacts]] +deps = ["Pkg"] +git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744" +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.3.0" + +[[AxisAlgorithms]] +deps = ["LinearAlgebra", "Random", "SparseArrays", "WoodburyMatrices"] +git-tree-sha1 = "a4d07a1c313392a77042855df46c5f534076fab9" +uuid = "13072b0f-2c55-5437-9ae7-d433b7a33950" +version = "1.0.0" + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[BinDeps]] +deps = ["Libdl", "Pkg", "SHA", "URIParser", "Unicode"] +git-tree-sha1 = "1289b57e8cf019aede076edab0587eb9644175bd" +uuid = "9e28174c-4ba2-5203-b857-d8d62c4213ee" +version = "1.0.2" + +[[Blosc]] +deps = ["Blosc_jll"] +git-tree-sha1 = "84cf7d0f8fd46ca6f1b3e0305b4b4a37afe50fd6" +uuid = "a74b3585-a348-5f62-a45c-50e91977d574" +version = "0.7.0" + +[[Blosc_jll]] +deps = ["Libdl", "Lz4_jll", "Pkg", "Zlib_jll", "Zstd_jll"] +git-tree-sha1 = "aa9ef39b54a168c3df1b2911e7797e4feee50fbe" +uuid = "0b7ba130-8d10-5ba8-a3d6-c5182647fed9" +version = "1.14.3+1" + +[[BufferedStreams]] +deps = ["Compat", "Test"] +git-tree-sha1 = "5d55b9486590fdda5905c275bb21ce1f0754020f" +uuid = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d" +version = "1.0.0" + +[[Bzip2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "c3598e525718abcc440f69cc6d5f60dda0a1b61e" +uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" +version = "1.0.6+5" + +[[CEnum]] +git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.1" + +[[Cairo]] +deps = ["Cairo_jll", "Colors", "Glib_jll", "Graphics", "Libdl", "Pango_jll"] +git-tree-sha1 = "d0b3f8b4ad16cb0a2988c6788646a5e6a17b6b1b" +uuid = "159f3aea-2a34-519c-b102-8c37f9878175" +version = "1.0.5" + +[[CairoMakie]] +deps = ["AbstractPlotting", "Cairo", "Colors", "FFTW", "FileIO", "FreeType", "GeometryBasics", "LinearAlgebra", "StaticArrays"] +git-tree-sha1 = "12d1b079f6a1c99f832668b866be57d707e04239" +uuid = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +version = "0.3.7" + +[[Cairo_jll]] +deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "e2f47f6d8337369411569fd45ae5753ca10394c6" +uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" +version = "1.16.0+6" + +[[CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.0" + +[[ColorBrewer]] +deps = ["Colors", "JSON", "Test"] +git-tree-sha1 = "61c5334f33d91e570e1d0c3eb5465835242582c4" +uuid = "a2cac450-b92f-5266-8821-25eda20663c8" +version = "0.4.0" + +[[ColorSchemes]] +deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random", "StaticArrays"] +git-tree-sha1 = "5d472aa8908568bc198564db06983913a6c2c8e7" +uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4" +version = "3.10.1" + +[[ColorTypes]] +deps = ["FixedPointNumbers", "Random"] +git-tree-sha1 = "4bffea7ed1a9f0f3d1a131bbcd4b925548d75288" +uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" +version = "0.10.9" + +[[ColorVectorSpace]] +deps = ["ColorTypes", "Colors", "FixedPointNumbers", "LinearAlgebra", "SpecialFunctions", "Statistics", "StatsBase"] +git-tree-sha1 = "4d17724e99f357bfd32afa0a9e2dda2af31a9aea" +uuid = "c3611d14-8923-5661-9e6a-0046d554d3a4" +version = "0.8.7" + +[[Colors]] +deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Reexport"] +git-tree-sha1 = "008d6bc68dea6beb6303fdc37188cb557391ebf2" +uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" +version = "0.12.4" + +[[CommonSubexpressions]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.0" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "a706ff10f1cd8dab94f59fd09c0e657db8e77ff0" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.23.0" + +[[CompilerSupportLibraries_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8e695f735fca77e9708e795eda62afdb869cbb70" +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "0.3.4+0" + +[[Conda]] +deps = ["JSON", "VersionParsing"] +git-tree-sha1 = "c0647249d785f1d5139c0cc96db8f6b32f7ec416" +uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d" +version = "1.5.0" + +[[Contour]] +deps = ["StaticArrays"] +git-tree-sha1 = "0d128f9c2d9560349dc46f60c42036e244271d72" +uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" +version = "0.5.6" + +[[DataAPI]] +git-tree-sha1 = "ad84f52c0b8f05aa20839484dbaf01690b41ff84" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.4.0" + +[[DataDeps]] +deps = ["HTTP", "Reexport", "SHA"] +git-tree-sha1 = "b439b948e3113e3893de985e8c908b034ce4ecf5" +uuid = "124859b0-ceae-595e-8997-d05f6a7a8dfe" +version = "0.7.4" + +[[DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "fb0aa371da91c1ff9dc7fbed6122d3e411420b9c" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.8" + +[[DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[DiffResults]] +deps = ["StaticArrays"] +git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.0.3" + +[[DiffRules]] +deps = ["NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "214c3fcac57755cfda163d91c58893a8723f93e9" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.0.2" + +[[Distances]] +deps = ["LinearAlgebra", "Statistics"] +git-tree-sha1 = "a5b88815e6984e9f3256b6ca0dc63109b16a506f" +uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +version = "0.9.2" + +[[Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[Distributions]] +deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "StaticArrays", "Statistics", "StatsBase", "StatsFuns"] +git-tree-sha1 = "501c11d708917ca09ce357bed163dbaf0f30229f" +uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" +version = "0.23.12" + +[[DocStringExtensions]] +deps = ["LibGit2", "Markdown", "Pkg", "Test"] +git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.8.3" + +[[EarCut_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "92d8f9f208637e8d2d28c664051a00569c01493d" +uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5" +version = "2.1.5+1" + +[[EllipsisNotation]] +git-tree-sha1 = "65dad386e877850e6fce4fc77f60fe75a468ce9d" +uuid = "da5c29d0-fa7d-589e-88eb-ea29b0a81949" +version = "0.4.0" + +[[Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "1402e52fcda25064f51c77a9655ce8680b76acf0" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.2.7+6" + +[[FFMPEG]] +deps = ["FFMPEG_jll", "x264_jll"] +git-tree-sha1 = "9a73ffdc375be61b0e4516d83d880b265366fe1f" +uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" +version = "0.4.0" + +[[FFMPEG_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "LibVPX_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "Pkg", "Zlib_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] +git-tree-sha1 = "3cc57ad0a213808473eafef4845a74766242e05f" +uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" +version = "4.3.1+4" + +[[FFTW]] +deps = ["AbstractFFTs", "FFTW_jll", "IntelOpenMP_jll", "Libdl", "LinearAlgebra", "MKL_jll", "Reexport"] +git-tree-sha1 = "8b7c16b56936047ca41bf25effa137ae0b381ae8" +uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +version = "1.2.4" + +[[FFTW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f10c3009373a2d5c4349b8a2932d8accb892892d" +uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" +version = "3.3.9+6" + +[[FileIO]] +deps = ["Pkg"] +git-tree-sha1 = "cad2e71389ecb2f4480e0de74faab04af13d7929" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.4.4" + +[[FillArrays]] +deps = ["LinearAlgebra", "Random", "SparseArrays"] +git-tree-sha1 = "502b3de6039d5b78c76118423858d981349f3823" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "0.9.7" + +[[FiniteDiff]] +deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"] +git-tree-sha1 = "7f7216e0eb46c20ee8ddab5c8f9a262ed72587b6" +uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" +version = "2.7.2" + +[[FixedPointNumbers]] +deps = ["Statistics"] +git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" +uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" +version = "0.8.4" + +[[Fontconfig_jll]] +deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "35895cf184ceaab11fd778b4590144034a167a2f" +uuid = "a3f928ae-7b40-5064-980b-68af3947d34b" +version = "2.13.1+14" + +[[Formatting]] +deps = ["Printf"] +git-tree-sha1 = "a0c901c29c0e7c763342751c0a94211d56c0de5c" +uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" +version = "0.4.1" + +[[ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "NaNMath", "Random", "SpecialFunctions", "StaticArrays"] +git-tree-sha1 = "8de2519a83c6c1c2442c2f481dd9a8364855daf4" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.14" + +[[FreeType]] +deps = ["CEnum", "FreeType2_jll"] +git-tree-sha1 = "d5e296dea44f9cfb8cc83681aa3db367e0cbfd4f" +uuid = "b38be410-82b0-50bf-ab77-7b57e271db43" +version = "3.0.1" + +[[FreeType2_jll]] +deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "cbd58c9deb1d304f5a245a0b7eb841a2560cfec6" +uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7" +version = "2.10.1+5" + +[[FreeTypeAbstraction]] +deps = ["ColorVectorSpace", "Colors", "FreeType", "GeometryBasics", "StaticArrays"] +git-tree-sha1 = "6380da9239839ee54f2e0628ef48803bcd102b94" +uuid = "663a7486-cb36-511b-a19d-713bb74d65c9" +version = "0.8.2" + +[[FriBidi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0d20aed5b14dd4c9a2453c1b601d08e1149679cc" +uuid = "559328eb-81f9-559d-9380-de523a88c83c" +version = "1.0.5+6" + +[[GZip]] +deps = ["Libdl"] +git-tree-sha1 = "039be665faf0b8ae36e089cd694233f5dee3f7d6" +uuid = "92fee26a-97fe-5a0c-ad85-20a5f3185b63" +version = "0.5.1" + +[[GeometryBasics]] +deps = ["EarCut_jll", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"] +git-tree-sha1 = "876a906eab3be990fdcbfe1e43bb3a76f4776f72" +uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326" +version = "0.3.3" + +[[Gettext_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "8c14294a079216000a0bdca5ec5a447f073ddc9d" +uuid = "78b55507-aeef-58d4-861c-77aaff3498b1" +version = "0.20.1+7" + +[[Glib_jll]] +deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "04690cc5008b38ecbdfede949220bc7d9ba26397" +uuid = "7746bdde-850d-59dc-9ae8-88ece973131d" +version = "2.59.0+4" + +[[Graphics]] +deps = ["Colors", "LinearAlgebra", "NaNMath"] +git-tree-sha1 = "45d684ead5b65c043ad46bd5be750d61c39d7ef8" +uuid = "a2bd30eb-e257-5431-a919-1863eab51364" +version = "1.0.2" + +[[Graphite2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "42adbc6fd39ba41138f894b8ac711146a2b0d986" +uuid = "3b182d85-2403-5c21-9c21-1e1f0cc25472" +version = "1.3.13+4" + +[[GridLayoutBase]] +deps = ["GeometryBasics", "InteractiveUtils", "Match", "Observables"] +git-tree-sha1 = "0702ac06d05e653996a37fea1d631bc7e74512ee" +uuid = "3955a311-db13-416c-9275-1d80ed98e5e9" +version = "0.4.1" + +[[Grisu]] +git-tree-sha1 = "03d381f65183cb2d0af8b3425fde97263ce9a995" +uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe" +version = "1.0.0" + +[[HDF5]] +deps = ["Blosc", "Compat", "HDF5_jll", "Libdl", "Mmap", "Random", "Requires"] +git-tree-sha1 = "96d77533eb46e208e801b939db8a27626c166565" +uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +version = "0.14.1" + +[[HDF5_jll]] +deps = ["Artifacts", "JLLWrappers", "LibCURL_jll", "Libdl", "OpenSSL_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "fd83fa0bde42e01952757f01149dd968c06c4dba" +uuid = "0234f1f7-429e-5d53-9886-15a909be8d59" +version = "1.12.0+1" + +[[HTTP]] +deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets", "URIs"] +git-tree-sha1 = "9634200f8e16554cb1620dfb20501483b873df86" +uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" +version = "0.9.0" + +[[HarfBuzz_jll]] +deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Gettext_jll", "Glib_jll", "Graphite2_jll", "ICU_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"] +git-tree-sha1 = "90bed5fc61d12d10832ebf988988104888eebaca" +uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566" +version = "2.6.1+10" + +[[ICU_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0ec2e6e6a049eb8520c19cd8976085afcf2943fb" +uuid = "a51ab1cf-af8e-5615-a023-bc2c838bba6b" +version = "67.1.0+2" + +[[ImageCore]] +deps = ["AbstractFFTs", "Colors", "FixedPointNumbers", "Graphics", "MappedArrays", "MosaicViews", "OffsetArrays", "PaddedViews", "Reexport"] +git-tree-sha1 = "ec29985885981ec7a8b97faa0ec86934ed813490" +uuid = "a09fc81d-aa75-5fe9-8630-4744c3626534" +version = "0.8.19" + +[[ImageIO]] +deps = ["FileIO", "PNGFiles"] +git-tree-sha1 = "5eb06178e68c139617fa7926fa0d73ddf9b80e2a" +uuid = "82e4d734-157c-48bb-816b-45c225c6df19" +version = "0.3.1" + +[[IndirectArrays]] +git-tree-sha1 = "c2a145a145dc03a7620af1444e0264ef907bd44f" +uuid = "9b13fd28-a010-5f03-acff-a1bbcff69959" +version = "0.5.1" + +[[Inflate]] +git-tree-sha1 = "f5fc07d4e706b84f72d54eedcc1c13d92fb0871c" +uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" +version = "0.1.2" + +[[IniFile]] +deps = ["Test"] +git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8" +uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f" +version = "0.5.0" + +[[IntelOpenMP_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "fb8e1c7a5594ba56f9011310790e03b5384998d6" +uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" +version = "2018.0.3+0" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[Interpolations]] +deps = ["AxisAlgorithms", "LinearAlgebra", "OffsetArrays", "Random", "Ratios", "SharedArrays", "SparseArrays", "StaticArrays", "WoodburyMatrices"] +git-tree-sha1 = "d2ff0813f0f110918db2537201686575fcf8d345" +uuid = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" +version = "0.13.0" + +[[IntervalSets]] +deps = ["Dates", "EllipsisNotation", "Statistics"] +git-tree-sha1 = "3b1cef135bc532b3c3401b309e1b8a2a2ba26af5" +uuid = "8197267c-284f-5f27-9208-e0e47529a953" +version = "0.5.1" + +[[Isoband]] +deps = ["isoband_jll"] +git-tree-sha1 = "f9b6d97355599074dc867318950adaa6f9946137" +uuid = "f1662d9f-8043-43de-a69a-05efc1cc6ff4" +version = "0.1.1" + +[[IterTools]] +git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18" +uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +version = "1.3.0" + +[[IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[JLLWrappers]] +git-tree-sha1 = "c70593677bbf2c3ccab4f7500d0f4dacfff7b75c" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.1.3" + +[[JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.1" + +[[KernelDensity]] +deps = ["Distributions", "DocStringExtensions", "FFTW", "Interpolations", "StatsBase"] +git-tree-sha1 = "09aeec87bdc9c1fa70d0b508dfa94a21acd280d9" +uuid = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" +version = "0.6.2" + +[[LAME_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "df381151e871f41ee86cee4f5f6fd598b8a68826" +uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d" +version = "3.100.0+3" + +[[LZO_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f128cd6cd05ffd6d3df0523ed99b90ff6f9b349a" +uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac" +version = "2.10.0+3" + +[[LibCURL_jll]] +deps = ["LibSSH2_jll", "Libdl", "MbedTLS_jll", "Pkg", "Zlib_jll", "nghttp2_jll"] +git-tree-sha1 = "897d962c20031e6012bba7b3dcb7a667170dad17" +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.70.0+2" + +[[LibGit2]] +deps = ["Printf"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[LibSSH2_jll]] +deps = ["Libdl", "MbedTLS_jll", "Pkg"] +git-tree-sha1 = "717705533148132e5466f2924b9a3657b16158e8" +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.9.0+3" + +[[LibVPX_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "85fcc80c3052be96619affa2fe2e6d2da3908e11" +uuid = "dd192d2f-8180-539f-9fb4-cc70b1dcf69a" +version = "1.9.0+1" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[Libffi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "a2cd088a88c0d37eef7d209fd3d8712febce0d90" +uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490" +version = "3.2.1+4" + +[[Libgcrypt_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"] +git-tree-sha1 = "b391a18ab1170a2e568f9fb8d83bc7c780cb9999" +uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4" +version = "1.8.5+4" + +[[Libgpg_error_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ec7f2e8ad5c9fa99fc773376cdbc86d9a5a23cb7" +uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8" +version = "1.36.0+3" + +[[Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8e924324b2e9275a51407a4e06deb3455b1e359f" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.16.0+7" + +[[Libmount_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "51ad0c01c94c1ce48d5cad629425035ad030bfd5" +uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9" +version = "2.34.0+3" + +[[Libuuid_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f879ae9edbaa2c74c922e8b85bb83cc84ea1450b" +uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700" +version = "2.34.0+7" + +[[LightGraphs]] +deps = ["ArnoldiMethod", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] +git-tree-sha1 = "a0d4bcea4b9c056da143a5ded3c2b7f7740c2d41" +uuid = "093fc24a-ae57-5d10-9952-331d41423f4d" +version = "1.3.0" + +[[LinearAlgebra]] +deps = ["Libdl"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[LsqFit]] +deps = ["Distributions", "ForwardDiff", "LinearAlgebra", "NLSolversBase", "OptimBase", "Random", "StatsBase"] +git-tree-sha1 = "b32b5549461fcb93bce223e264d4a7ef0c9923fd" +uuid = "2fda8390-95c7-5789-9bda-21331edee243" +version = "0.11.0" + +[[Lz4_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "51b1db0732bbdcfabb60e36095cc3ed9c0016932" +uuid = "5ced341a-0733-55b8-9ab6-a4889d929147" +version = "1.9.2+2" + +[[MAT]] +deps = ["BufferedStreams", "CodecZlib", "HDF5", "SparseArrays"] +git-tree-sha1 = "61f049fe2f7168b8002d5794a1bb37f1f3bc92e4" +uuid = "23992714-dd62-5051-b70f-ba57cb901cac" +version = "0.9.2" + +[[MKL_jll]] +deps = ["IntelOpenMP_jll", "Libdl", "Pkg"] +git-tree-sha1 = "eb540ede3aabb8284cb482aa41d00d6ca850b1f8" +uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" +version = "2020.2.254+0" + +[[MLDatasets]] +deps = ["BinDeps", "ColorTypes", "DataDeps", "DelimitedFiles", "FixedPointNumbers", "GZip", "MAT", "Requires"] +git-tree-sha1 = "163a628fb306280708baff9aa383c5469267e1c1" +uuid = "eb30cadb-4394-5ae3-aed4-317e484a6458" +version = "0.5.3" + +[[MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.6" + +[[MappedArrays]] +deps = ["FixedPointNumbers"] +git-tree-sha1 = "b92bd220c95a8bbe89af28f11201fd080e0e3fe7" +uuid = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900" +version = "0.3.0" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[Match]] +git-tree-sha1 = "5cf525d97caf86d29307150fcba763a64eaa9cbe" +uuid = "7eb4fadd-790c-5f42-8a69-bfa0b872bfbf" +version = "1.1.0" + +[[MbedTLS]] +deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"] +git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe" +uuid = "739be429-bea8-5141-9913-cc70e7f3736d" +version = "1.0.3" + +[[MbedTLS_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0eef589dd1c26a3ac9d753fe1a8bcad63f956fa6" +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.16.8+1" + +[[Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "ed61674a0864832495ffe0a7e889c0da76b0f4c8" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "0.4.4" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[MosaicViews]] +deps = ["MappedArrays", "OffsetArrays", "PaddedViews"] +git-tree-sha1 = "614e8d77264d20c1db83661daadfab38e8e4b77e" +uuid = "e94cdb99-869f-56ef-bcf0-1ae2bcbe0389" +version = "0.2.4" + +[[NLSolversBase]] +deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"] +git-tree-sha1 = "39d6bc45e99c96e6995cbddac02877f9b61a1dd1" +uuid = "d41bc354-129a-5804-8e4c-c37616107c6c" +version = "7.7.1" + +[[NaNMath]] +git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "0.3.5" + +[[NearestNeighborDescent]] +deps = ["DataStructures", "Distances", "LightGraphs", "Random", "Reexport", "SparseArrays"] +git-tree-sha1 = "bc728ade793522637d7833e5a39fb34fb6b38859" +uuid = "dd2c4c9e-a32f-5b2f-b342-08c2f244fce8" +version = "0.3.3" + +[[Observables]] +git-tree-sha1 = "635fe10760447cfa86f5118edf2f47eb864fb495" +uuid = "510215fc-4207-5dde-b226-833fc4488ee2" +version = "0.3.2" + +[[OffsetArrays]] +deps = ["Adapt"] +git-tree-sha1 = "9db93b990af57b3a56dca38476832f60d58f777b" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.4.0" + +[[Ogg_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "a42c0f138b9ebe8b58eba2271c5053773bde52d0" +uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051" +version = "1.3.4+2" + +[[OpenBLAS_jll]] +deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] +git-tree-sha1 = "0c922fd9634e358622e333fc58de61f05a048492" +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.9+5" + +[[OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "71bbbc616a1d710879f5a1021bcba65ffba6ce58" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "1.1.1+6" + +[[OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9db77584158d0ab52307f8c04f8e7c08ca76b5b3" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.3+4" + +[[OptimBase]] +deps = ["NLSolversBase", "Printf", "Reexport"] +git-tree-sha1 = "4c26a757fbb5b1893b7df19a44e21762d8f8e470" +uuid = "87e2bd06-a317-5318-96d9-3ecbac512eee" +version = "2.0.1" + +[[Opus_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f9d57f4126c39565e05a2b0264df99f497fc6f37" +uuid = "91d4177d-7536-5919-b921-800302f37372" +version = "1.3.1+3" + +[[OrderedCollections]] +git-tree-sha1 = "cf59cfed2e2c12e8a2ff0a4f1e9b2cd8650da6db" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.3.2" + +[[PCRE_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "1b556ad51dceefdbf30e86ffa8f528b73c7df2bb" +uuid = "2f80f16e-611a-54ab-bc61-aa92de5b98fc" +version = "8.42.0+4" + +[[PDMats]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"] +git-tree-sha1 = "95a4038d1011dfdbde7cecd2ad0ac411e53ab1bc" +uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" +version = "0.10.1" + +[[PNGFiles]] +deps = ["CEnum", "ImageCore", "IndirectArrays", "OffsetArrays", "libpng_jll"] +git-tree-sha1 = "e5cefb581115ab216c4a76819747bbeb6eb2948e" +uuid = "f57f5aa1-a3ce-4bc8-8ab9-96f992907883" +version = "0.3.2" + +[[Packing]] +deps = ["GeometryBasics"] +git-tree-sha1 = "f4049d379326c2c7aa875c702ad19346ecb2b004" +uuid = "19eb6ba3-879d-56ad-ad62-d5c202156566" +version = "0.4.1" + +[[PaddedViews]] +deps = ["OffsetArrays"] +git-tree-sha1 = "91d229e113e8975a399e40d7c0b1ddf4da6d3c59" +uuid = "5432bcbf-9aad-5242-b902-cca2824c8663" +version = "0.5.7" + +[[Pango_jll]] +deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "FriBidi_jll", "Glib_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9a336dee51d20d1ed890c4a8dca636e86e2b76ca" +uuid = "36c8627f-9965-5494-a995-c6b170f724f3" +version = "1.42.4+10" + +[[Parsers]] +deps = ["Dates"] +git-tree-sha1 = "b417be52e8be24e916e34b3d70ec2da7bdf56a68" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "1.0.12" + +[[Pixman_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6a20a83c1ae86416f0a5de605eaea08a552844a3" +uuid = "30392449-352a-5448-841d-b1acce4e97dc" +version = "0.40.0+0" + +[[Pkg]] +deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[PlotUtils]] +deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "Statistics"] +git-tree-sha1 = "4e098f88dad9a2b518b83124a116be1c49e2b2bf" +uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043" +version = "1.0.7" + +[[PolygonOps]] +git-tree-sha1 = "c031d2332c9a8e1c90eca239385815dc271abb22" +uuid = "647866c9-e3ac-4575-94e7-e3d426903924" +version = "0.1.1" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[PyCall]] +deps = ["Conda", "Dates", "Libdl", "LinearAlgebra", "MacroTools", "Serialization", "VersionParsing"] +git-tree-sha1 = "b6dff5fa725eff4f775f472acd86756d6e31fb02" +uuid = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" +version = "1.92.1" + +[[QuadGK]] +deps = ["DataStructures", "LinearAlgebra"] +git-tree-sha1 = "12fbe86da16df6679be7521dfb39fbc861e1dc7b" +uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" +version = "2.4.1" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[Ratios]] +git-tree-sha1 = "37d210f612d70f3f7d57d488cb3b6eff56ad4e41" +uuid = "c84ed2f1-dad5-54f0-aa8e-dbefe2724439" +version = "0.4.0" + +[[Reexport]] +deps = ["Pkg"] +git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "0.2.0" + +[[Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "e05c53ebc86933601d36212a93b39144a2733493" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.1.1" + +[[Rmath]] +deps = ["Random", "Rmath_jll"] +git-tree-sha1 = "86c5647b565873641538d8f812c04e4c9dbeb370" +uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" +version = "0.6.1" + +[[Rmath_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "d76185aa1f421306dec73c057aa384bad74188f0" +uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" +version = "0.2.2+1" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[Showoff]] +deps = ["Dates", "Grisu"] +git-tree-sha1 = "ee010d8f103468309b8afac4abb9be2e18ff1182" +uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f" +version = "0.3.2" + +[[SignedDistanceFields]] +deps = ["Random", "Statistics", "Test"] +git-tree-sha1 = "d263a08ec505853a5ff1c1ebde2070419e3f28e9" +uuid = "73760f76-fbc4-59ce-8f25-708e95d2df96" +version = "0.4.0" + +[[SimpleTraits]] +deps = ["InteractiveUtils", "MacroTools"] +git-tree-sha1 = "daf7aec3fe3acb2131388f93a4c409b8c7f62226" +uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" +version = "0.9.3" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SortingAlgorithms]] +deps = ["DataStructures", "Random", "Test"] +git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "0.3.1" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[SpecialFunctions]] +deps = ["OpenSpecFun_jll"] +git-tree-sha1 = "d8d8b8a9f4119829410ecd706da4cc8594a1e020" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "0.10.3" + +[[StaticArrays]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "da4cf579416c81994afd6322365d00916c79b8ae" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "0.12.5" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"] +git-tree-sha1 = "7bab7d4eb46b225b35179632852b595a3162cb61" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.33.2" + +[[StatsFuns]] +deps = ["Rmath", "SpecialFunctions"] +git-tree-sha1 = "3b9f665c70712af3264b61c27a7e1d62055dafd1" +uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +version = "0.9.6" + +[[StructArrays]] +deps = ["Adapt", "DataAPI", "Tables"] +git-tree-sha1 = "8099ed9fb90b6e754d6ba8c6ed8670f010eadca0" +uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" +version = "0.4.4" + +[[SuiteSparse]] +deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] +uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" + +[[TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.0" + +[[Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] +git-tree-sha1 = "5131a624173d532299d1c7eb05341c18112b21b8" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.2.1" + +[[Test]] +deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[TranscodingStreams]] +deps = ["Random", "Test"] +git-tree-sha1 = "7c53c35547de1c5b9d46a4797cf6d8253807108c" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.9.5" + +[[UMAP]] +deps = ["Arpack", "Distances", "LinearAlgebra", "LsqFit", "NearestNeighborDescent", "Random", "SparseArrays"] +path = ".." +uuid = "c4f8c510-2410-5be4-91d7-4fbaeb39457e" +version = "0.1.7" + +[[URIParser]] +deps = ["Unicode"] +git-tree-sha1 = "53a9f49546b8d2dd2e688d216421d050c9a31d0d" +uuid = "30578b45-9adc-5946-b283-645ec420af67" +version = "0.4.1" + +[[URIs]] +git-tree-sha1 = "bc331715463c41d601cf8bfd38ca70a490af5c5b" +uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" +version = "1.1.0" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[UnicodeFun]] +deps = ["REPL"] +git-tree-sha1 = "53915e50200959667e78a92a418594b428dffddf" +uuid = "1cfade01-22cf-5700-b092-accc4b62d6e1" +version = "0.4.1" + +[[VersionParsing]] +git-tree-sha1 = "80229be1f670524750d905f8fc8148e5a8c4537f" +uuid = "81def892-9a0e-5fdd-b105-ffc91e053289" +version = "1.2.0" + +[[WoodburyMatrices]] +deps = ["LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "59e2ad8fd1591ea019a5259bd012d7aee15f995c" +uuid = "efce3f68-66dc-5838-9240-27a6d6f5f9b6" +version = "0.5.3" + +[[XML2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "be0db24f70aae7e2b89f2f3092e93b8606d659a6" +uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" +version = "2.9.10+3" + +[[XSLT_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "2b3eac39df218762d2d005702d601cd44c997497" +uuid = "aed1982a-8fda-507f-9586-7b0439959a61" +version = "1.1.33+4" + +[[Xorg_libX11_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"] +git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527" +uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc" +version = "1.6.9+4" + +[[Xorg_libXau_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e" +uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec" +version = "1.0.9+4" + +[[Xorg_libXdmcp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4" +uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05" +version = "1.1.3+4" + +[[Xorg_libXext_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3" +uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3" +version = "1.3.4+4" + +[[Xorg_libXrender_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "19560f30fd49f4d4efbe7002a1037f8c43d43b96" +uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa" +version = "0.9.10+4" + +[[Xorg_libpthread_stubs_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb" +uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74" +version = "0.1.0+3" + +[[Xorg_libxcb_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"] +git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6" +uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b" +version = "1.13.0+3" + +[[Xorg_xtrans_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845" +uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10" +version = "1.4.0+3" + +[[Zlib_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "320228915c8debb12cb434c59057290f0834dbf6" +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.11+18" + +[[Zstd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6f1abcb0c44f184690912aa4b0ba861dd64f11b9" +uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" +version = "1.4.5+2" + +[[isoband_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "a1ac99674715995a536bbce674b068ec1b7d893d" +uuid = "9a68df92-36a6-505f-a73e-abb412b6bfb4" +version = "0.2.2+0" + +[[libass_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "acc685bcf777b2202a904cdcb49ad34c2fa1880c" +uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0" +version = "0.14.0+4" + +[[libfdk_aac_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "7a5780a0d9c6864184b3a2eeeb833a0c871f00ab" +uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" +version = "0.1.6+4" + +[[libpng_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "6abbc424248097d69c0c87ba50fcb0753f93e0ee" +uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f" +version = "1.6.37+6" + +[[libvorbis_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg"] +git-tree-sha1 = "fa14ac25af7a4b8a7f61b287a124df7aab601bcd" +uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a" +version = "1.3.6+6" + +[[nghttp2_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "8e2c44ab4d49ad9518f359ed8b62f83ba8beede4" +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.40.0+2" + +[[x264_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "d713c1ce4deac133e3334ee12f4adff07f81778f" +uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" +version = "2020.7.14+2" + +[[x265_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "487da2f8f2f0c8ee0e83f39d13037d6bbf0a45ab" +uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" +version = "3.0.0+3" diff --git a/examples/Project.toml b/examples/Project.toml new file mode 100644 index 0000000..4ab52fe --- /dev/null +++ b/examples/Project.toml @@ -0,0 +1,11 @@ +[deps] +AbstractPlotting = "537997a7-5e4e-5d89-9595-2241ea00577e" +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" +PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" +UMAP = "c4f8c510-2410-5be4-91d7-4fbaeb39457e" + +[compat] +AbstractPlotting = "0.13" +MLDatasets = "0.5" diff --git a/examples/plotting.jl b/examples/plotting.jl new file mode 100644 index 0000000..54f3cc3 --- /dev/null +++ b/examples/plotting.jl @@ -0,0 +1,69 @@ +using CairoMakie +using AbstractPlotting +using AbstractPlotting.MakieLayout +using AbstractPlotting: px + + +function hide_decorations!(ax) + ax.xticksvisible = false + ax.yticksvisible = false + ax.xticklabelsvisible = false + ax.yticklabelsvisible = false + ax.bottomspinevisible = false + ax.leftspinevisible = false + ax.topspinevisible = false + ax.rightspinevisible = false + ax.xgridvisible = false + ax.ygridvisible = false +end + +function plot_umap(embedding, color) + scene, layout = layoutscene() + + layout[1, 1] = umap_ax = LAxis(scene) + + umap_ax.xautolimitmargin[] = (2 * umap_ax.xautolimitmargin[][1], + 2 * umap_ax.xautolimitmargin[][2]) + umap_ax.yautolimitmargin[] = (2 * umap_ax.yautolimitmargin[][1], + 2 * umap_ax.yautolimitmargin[][2]) + + hide_decorations!(umap_ax) + + plt = scatter!(umap_ax, Point2f0.(eachcol(embedding)); color=color, markersize=5px, strokecolor = :transparent) + + cbar = layout[:, 2] = LColorbar(scene, plt, label="Number") + cbar.width = Fixed(30) + cbar.height = Relative(2 / 3) + + scene +end + + +function plot_umap_comparison((e1, c1), (e2, c2); titles, title=nothing, kwargs...) + scene, layout = layoutscene() + + layout[1, 1] = ax1 = LAxis(scene, title=titles[1]) + layout[1, 2] = ax2 = LAxis(scene, title=titles[2]) + + for umap_ax in (ax1, ax2) + umap_ax.xautolimitmargin[] = (2 * umap_ax.xautolimitmargin[][1], + 2 * umap_ax.xautolimitmargin[][2]) + umap_ax.yautolimitmargin[] = (2 * umap_ax.yautolimitmargin[][1], + 2 * umap_ax.yautolimitmargin[][2]) + hide_decorations!(umap_ax) + end + + plt1 = scatter!(ax1, Point2f0.(eachcol(e1)); color=c1, markersize=1px, strokecolor = :transparent, kwargs...) + plt2 = scatter!(ax2, Point2f0.(eachcol(e2)); color=c2, markersize=1px, strokecolor = :transparent, kwargs...) + + cbar = layout[:, 3] = LColorbar(scene, plt1, label="Number") + cbar.width = Fixed(30) + cbar.height = Relative(2 / 3) + + if title !== nothing + layout[0, :] = LText(scene, title, textsize=20, font="Noto Sans Bold", + color=(:black, 0.25)) + end + + scene +end diff --git a/python/Manifest.toml b/python/Manifest.toml new file mode 100644 index 0000000..7396b43 --- /dev/null +++ b/python/Manifest.toml @@ -0,0 +1,375 @@ +# This file is machine-generated - editing it directly is not advised + +[[ArnoldiMethod]] +deps = ["DelimitedFiles", "LinearAlgebra", "Random", "SparseArrays", "StaticArrays", "Test"] +git-tree-sha1 = "2b6845cea546604fb4dca4e31414a6a59d39ddcd" +uuid = "ec485272-7323-5ecc-a04f-4719b315124d" +version = "0.0.4" + +[[Arpack]] +deps = ["Arpack_jll", "Libdl", "LinearAlgebra"] +git-tree-sha1 = "2ff92b71ba1747c5fdd541f8fc87736d82f40ec9" +uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" +version = "0.4.0" + +[[Arpack_jll]] +deps = ["Libdl", "OpenBLAS_jll", "Pkg"] +git-tree-sha1 = "e214a9b9bd1b4e1b4f15b22c0994862b66af7ff7" +uuid = "68821587-b530-5797-8361-c406ea357684" +version = "3.5.0+3" + +[[ArrayInterface]] +deps = ["LinearAlgebra", "Requires", "SparseArrays"] +git-tree-sha1 = "de4bb46df3f67769356e737f2c7ce1d67da3ae49" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "2.14.4" + +[[Artifacts]] +deps = ["Pkg"] +git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744" +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.3.0" + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[CommonSubexpressions]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.0" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "a706ff10f1cd8dab94f59fd09c0e657db8e77ff0" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.23.0" + +[[CompilerSupportLibraries_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8e695f735fca77e9708e795eda62afdb869cbb70" +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "0.3.4+0" + +[[Conda]] +deps = ["JSON", "VersionParsing"] +git-tree-sha1 = "c0647249d785f1d5139c0cc96db8f6b32f7ec416" +uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d" +version = "1.5.0" + +[[DataAPI]] +git-tree-sha1 = "ad84f52c0b8f05aa20839484dbaf01690b41ff84" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.4.0" + +[[DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "fb0aa371da91c1ff9dc7fbed6122d3e411420b9c" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.8" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[DiffResults]] +deps = ["StaticArrays"] +git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.0.3" + +[[DiffRules]] +deps = ["NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "214c3fcac57755cfda163d91c58893a8723f93e9" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.0.2" + +[[Distances]] +deps = ["LinearAlgebra", "Statistics"] +git-tree-sha1 = "a5b88815e6984e9f3256b6ca0dc63109b16a506f" +uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +version = "0.9.2" + +[[Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[Distributions]] +deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "StaticArrays", "Statistics", "StatsBase", "StatsFuns"] +git-tree-sha1 = "501c11d708917ca09ce357bed163dbaf0f30229f" +uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" +version = "0.23.12" + +[[FillArrays]] +deps = ["LinearAlgebra", "Random", "SparseArrays"] +git-tree-sha1 = "502b3de6039d5b78c76118423858d981349f3823" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "0.9.7" + +[[FiniteDiff]] +deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"] +git-tree-sha1 = "7f7216e0eb46c20ee8ddab5c8f9a262ed72587b6" +uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" +version = "2.7.2" + +[[ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "NaNMath", "Random", "SpecialFunctions", "StaticArrays"] +git-tree-sha1 = "8de2519a83c6c1c2442c2f481dd9a8364855daf4" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.14" + +[[Inflate]] +git-tree-sha1 = "f5fc07d4e706b84f72d54eedcc1c13d92fb0871c" +uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" +version = "0.1.2" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[JLLWrappers]] +git-tree-sha1 = "c70593677bbf2c3ccab4f7500d0f4dacfff7b75c" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.1.3" + +[[JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.1" + +[[LibGit2]] +deps = ["Printf"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[LightGraphs]] +deps = ["ArnoldiMethod", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] +git-tree-sha1 = "a0d4bcea4b9c056da143a5ded3c2b7f7740c2d41" +uuid = "093fc24a-ae57-5d10-9952-331d41423f4d" +version = "1.3.0" + +[[LinearAlgebra]] +deps = ["Libdl"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[LsqFit]] +deps = ["Distributions", "ForwardDiff", "LinearAlgebra", "NLSolversBase", "OptimBase", "Random", "StatsBase"] +git-tree-sha1 = "b32b5549461fcb93bce223e264d4a7ef0c9923fd" +uuid = "2fda8390-95c7-5789-9bda-21331edee243" +version = "0.11.0" + +[[MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.6" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "ed61674a0864832495ffe0a7e889c0da76b0f4c8" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "0.4.4" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[NLSolversBase]] +deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"] +git-tree-sha1 = "39d6bc45e99c96e6995cbddac02877f9b61a1dd1" +uuid = "d41bc354-129a-5804-8e4c-c37616107c6c" +version = "7.7.1" + +[[NaNMath]] +git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "0.3.5" + +[[NearestNeighborDescent]] +deps = ["DataStructures", "Distances", "LightGraphs", "Random", "Reexport", "SparseArrays"] +git-tree-sha1 = "bc728ade793522637d7833e5a39fb34fb6b38859" +uuid = "dd2c4c9e-a32f-5b2f-b342-08c2f244fce8" +version = "0.3.3" + +[[OpenBLAS_jll]] +deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] +git-tree-sha1 = "0c922fd9634e358622e333fc58de61f05a048492" +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.9+5" + +[[OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9db77584158d0ab52307f8c04f8e7c08ca76b5b3" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.3+4" + +[[OptimBase]] +deps = ["NLSolversBase", "Printf", "Reexport"] +git-tree-sha1 = "4c26a757fbb5b1893b7df19a44e21762d8f8e470" +uuid = "87e2bd06-a317-5318-96d9-3ecbac512eee" +version = "2.0.1" + +[[OrderedCollections]] +git-tree-sha1 = "cf59cfed2e2c12e8a2ff0a4f1e9b2cd8650da6db" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.3.2" + +[[PDMats]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"] +git-tree-sha1 = "95a4038d1011dfdbde7cecd2ad0ac411e53ab1bc" +uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" +version = "0.10.1" + +[[Parsers]] +deps = ["Dates"] +git-tree-sha1 = "6370b5b3cf2ce5a3d2b6f7ab2dc10f374e4d7d2b" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "1.0.14" + +[[Pkg]] +deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[PyCall]] +deps = ["Conda", "Dates", "Libdl", "LinearAlgebra", "MacroTools", "Serialization", "VersionParsing"] +git-tree-sha1 = "b6dff5fa725eff4f775f472acd86756d6e31fb02" +uuid = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" +version = "1.92.1" + +[[QuadGK]] +deps = ["DataStructures", "LinearAlgebra"] +git-tree-sha1 = "12fbe86da16df6679be7521dfb39fbc861e1dc7b" +uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" +version = "2.4.1" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[Reexport]] +deps = ["Pkg"] +git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "0.2.0" + +[[Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "e05c53ebc86933601d36212a93b39144a2733493" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.1.1" + +[[Rmath]] +deps = ["Random", "Rmath_jll"] +git-tree-sha1 = "86c5647b565873641538d8f812c04e4c9dbeb370" +uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" +version = "0.6.1" + +[[Rmath_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "d76185aa1f421306dec73c057aa384bad74188f0" +uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" +version = "0.2.2+1" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[SimpleTraits]] +deps = ["InteractiveUtils", "MacroTools"] +git-tree-sha1 = "daf7aec3fe3acb2131388f93a4c409b8c7f62226" +uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" +version = "0.9.3" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SortingAlgorithms]] +deps = ["DataStructures", "Random", "Test"] +git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "0.3.1" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[SpecialFunctions]] +deps = ["OpenSpecFun_jll"] +git-tree-sha1 = "d8d8b8a9f4119829410ecd706da4cc8594a1e020" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "0.10.3" + +[[StaticArrays]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "da4cf579416c81994afd6322365d00916c79b8ae" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "0.12.5" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"] +git-tree-sha1 = "7bab7d4eb46b225b35179632852b595a3162cb61" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.33.2" + +[[StatsFuns]] +deps = ["Rmath", "SpecialFunctions"] +git-tree-sha1 = "3b9f665c70712af3264b61c27a7e1d62055dafd1" +uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +version = "0.9.6" + +[[SuiteSparse]] +deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] +uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" + +[[Test]] +deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[UMAP]] +deps = ["Arpack", "Distances", "LinearAlgebra", "LsqFit", "NearestNeighborDescent", "Random", "SparseArrays"] +path = ".." +uuid = "c4f8c510-2410-5be4-91d7-4fbaeb39457e" +version = "0.1.7" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[VersionParsing]] +git-tree-sha1 = "80229be1f670524750d905f8fc8148e5a8c4537f" +uuid = "81def892-9a0e-5fdd-b105-ffc91e053289" +version = "1.2.0" diff --git a/python/Project.toml b/python/Project.toml new file mode 100644 index 0000000..9ef9283 --- /dev/null +++ b/python/Project.toml @@ -0,0 +1,3 @@ +[deps] +PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" +UMAP = "c4f8c510-2410-5be4-91d7-4fbaeb39457e" diff --git a/python/check_fuzzy_intersection.jl b/python/check_fuzzy_intersection.jl new file mode 100644 index 0000000..24e30ce --- /dev/null +++ b/python/check_fuzzy_intersection.jl @@ -0,0 +1,20 @@ +# run on Julia 1.5.3 +using PyCall +pyimport_conda("llvmlite", "llvmlite==0.33", "numba") # to try to avoid segfaults by getting Numba with LLVM 9 instead of 10 +const py_umap = pyimport_conda("umap", "umap-learn") + +# https://github.com/JuliaPy/PyCall.jl/issues/204#issuecomment-192333326 +PyCall.PyObject(S::SparseMatrixCSC) = + pyimport("scipy.sparse")["csc_matrix"]((S.nzval, S.rowval .- 1, S.colptr .- 1), shape=size(S)) + + + +g1 = sprand(5000,5000,0.01) +g2 = sprand(5000,5000,0.01) + +py_ans = py_umap.umap_.general_simplicial_set_intersection(g1, g2, weight=0.5) + +jl_ans = UMAP._fuzzy_intersection(nothing, nothing, UMAP.Graph(nothing, g1), UMAP.Graph(nothing, g2); mix_weight=0.5) + + +rel_error = norm(jl_ans - py_ans.todense()) / max(norm(jl_ans), norm(py_ans.todense())) diff --git a/src/UMAP.jl b/src/UMAP.jl index 933eb9a..a489f19 100644 --- a/src/UMAP.jl +++ b/src/UMAP.jl @@ -7,10 +7,10 @@ using LsqFit: curve_fit using NearestNeighborDescent using SparseArrays +include("umap_.jl") include("utils.jl") include("embeddings.jl") -include("umap_.jl") -export umap, UMAP_, transform +export Embedding, Categorical, DataView end # module diff --git a/src/umap_.jl b/src/umap_.jl index be745da..183f519 100644 --- a/src/umap_.jl +++ b/src/umap_.jl @@ -1,178 +1,159 @@ # an implementation of Uniform Manifold Approximation and Projection # for Dimension Reduction, L. McInnes, J. Healy, J. Melville, 2018. -struct UMAP_{S <: Real, M <: AbstractMatrix{S}, N <: AbstractMatrix{S}, D<:AbstractMatrix{S}, K<:AbstractMatrix{<:Integer}, I<:AbstractMatrix{S}} - graph::M - embedding::N + +Base.@kwdef struct Params{M, T, NT <: NamedTuple} + metric::M + n_neighbors::Int = 15 + local_connectivity::Int = 1 + set_operation_ratio::T = 1 + nndescent_kwargs::NT = NamedTuple() +end + +struct DataView{P<:Params, D} + params::P data::D +end + +DataView(X; kwargs...) = DataView(Params(; kwargs...), X) + +params(d::DataView) = d.params +n_points(d::DataView) = size(d.data)[end] + +Base.@kwdef struct Categorical{T} + far_dist::T = 5.0 + unknown_dist::T = 1.0 +end + +struct KNNs{DD<:DataView,K,D} + data_with_metric::DD knns::K - dists::I - - function UMAP_{S, M, N, D, K, I}(graph, embedding, data, knns, dists) where {S<:Real, - M<:AbstractMatrix{S}, - N<:AbstractMatrix{S}, - D<:AbstractMatrix{S}, - K<:AbstractMatrix{<:Integer}, - I<:AbstractMatrix{S}} - issymmetric(graph) || isapprox(graph, graph') || error("UMAP_ constructor expected graph to be a symmetric matrix") - size(knns) == size(dists) || error("UMAP_ constructor expected knns and dists to have equal size") - new(graph, embedding, data, knns, dists) - end + dists::D end -function UMAP_(graph::M, embedding::N, data::D, knns::K, dists::I) where {S<:Real, - M<:AbstractMatrix{S}, - N<:AbstractMatrix{S}, - D<:AbstractMatrix{S}, - K<:AbstractMatrix{<:Integer}, - I<:AbstractMatrix{S}} - return UMAP_{S, M, N, D, K, I}(graph, embedding, data, knns, dists) +# For categorical data, we don't compute KNNs +KNNs(d::DataView{<:Params{<:Categorical}}) = KNNs(d, nothing, nothing) + +function KNNs(d::DataView) + knns, dists = knn_search(d.data, params(d).n_neighbors, params(d).metric; nndescent_kwargs = params(d).nndescent_kwargs) + return KNNs(d, knns, dists) end -const SMOOTH_K_TOLERANCE = 1e-5 +params(k::KNNs) = params(k.data_with_metric) +n_points(k::KNNs) = n_points(k.data_with_metric) +struct Graph{K,G} + knns::K + graph::G +end -""" - umap(X::AbstractMatrix[, n_components=2]; ) -> embedding +params(g::Graph) = params(g.knns) +n_points(g::Graph) = n_points(g.knns) -Embed the data `X` into a `n_components`-dimensional space. `n_neighbors` controls -how many neighbors to consider as locally connected. +# For categorical data, we don't compute a fuzzy graph +Graph(knns::KNNs{<:DataView{<:Params{<:Categorical}}}) = Graph(knns, nothing) -See `UMAP_` for a description of keyword arguments. -""" -function umap(args...; kwargs...) - # this is just a convenience function for now - return UMAP_(args...; kwargs...).embedding +function Graph(knns::KNNs) + graph = fuzzy_simplicial_set(knns.knns, knns.dists, params(knns).n_neighbors, n_points(knns), params(knns).local_connectivity, params(knns).set_operation_ratio) + return Graph(knns, graph) end -""" - UMAP_(X::AbstractMatrix[, n_components=2]; ) -> UMAP_ object - -Create a model representing the embedding of data `X` into `n_components`-dimensional space. -The returned model has the following fields: - -- `graph`: the graph representing the fuzzy simplicial set of the manifold of `X`. -- `embedding`: the `n-component`-dimensional embedding of the data `X`. -- `data`: a reference to the input data `X`. -- `knns`: a matrix of indices of `X` representing each point's nearest neighbors according to `metric`. - `knns[j, i]` is the index of point i's jth nearest neighbor. -- `dists`: the respective distances of the above neighbors. - `dists[j, i]` is the distance of point i's jth nearest neighbor. - -# Keyword Arguments -- `n_neighbors::Integer = 15`: the number of neighbors to consider as locally connected. Larger values capture more global structure in the data, while small values capture more local structure. -- `metric::{SemiMetric, Symbol} = Euclidean()`: the metric to calculate distance in the input space. It is also possible to pass `metric = :precomputed` to treat `X` like a precomputed distance matrix. -- `n_epochs::Integer = 300`: the number of training epochs for embedding optimization -- `learning_rate::Real = 1`: the initial learning rate during optimization -- `init::Symbol = :spectral`: how to initialize the output embedding; valid options are `:spectral` and `:random` -- `min_dist::Real = 0.1`: the minimum spacing of points in the output embedding -- `spread::Real = 1`: the effective scale of embedded points. Determines how clustered embedded points are in combination with `min_dist`. -- `set_operation_ratio::Real = 1`: interpolates between fuzzy set union and fuzzy set intersection when constructing the UMAP graph (global fuzzy simplicial set). The value of this parameter should be between 1.0 and 0.0: 1.0 indicates pure fuzzy union, while 0.0 indicates pure fuzzy intersection. -- `local_connectivity::Integer = 1`: the number of nearest neighbors that should be assumed to be locally connected. The higher this value, the more connected the manifold becomes. This should not be set higher than the intrinsic dimension of the manifold. -- `repulsion_strength::Real = 1`: the weighting of negative samples during the optimization process. -- `neg_sample_rate::Integer = 5`: the number of negative samples to select for each positive sample. Higher values will increase computational cost but result in slightly more accuracy. -- `a = nothing`: this controls the embedding. By default, this is determined automatically by `min_dist` and `spread`. -- `b = nothing`: this controls the embedding. By default, this is determined automatically by `min_dist` and `spread`. -""" -function UMAP_(X::AbstractMatrix{S}, - n_components::Integer = 2; - n_neighbors::Integer = 15, - metric::Union{SemiMetric, Symbol} = Euclidean(), - n_epochs::Integer = 300, - learning_rate::Real = 1, - init::Symbol = :spectral, - min_dist::Real = 1//10, - spread::Real = 1, - set_operation_ratio::Real = 1, - local_connectivity::Integer = 1, - repulsion_strength::Real = 1, - neg_sample_rate::Integer = 5, - a::Union{Real, Nothing} = nothing, - b::Union{Real, Nothing} = nothing - ) where {S<:Real} - # argument checking - size(X, 2) > n_neighbors > 0|| throw(ArgumentError("size(X, 2) must be greater than n_neighbors and n_neighbors must be greater than 0")) - size(X, 1) > n_components > 1 || throw(ArgumentError("size(X, 1) must be greater than n_components and n_components must be greater than 1")) - n_epochs > 0 || throw(ArgumentError("n_epochs must be greater than 0")) - learning_rate > 0 || throw(ArgumentError("learning_rate must be greater than 0")) - min_dist > 0 || throw(ArgumentError("min_dist must be greater than 0")) - 0 ≤ set_operation_ratio ≤ 1 || throw(ArgumentError("set_operation_ratio must lie in [0, 1]")) - local_connectivity > 0 || throw(ArgumentError("local_connectivity must be greater than 0")) - - - # main algorithm - knns, dists = knn_search(X, n_neighbors, metric) - graph = fuzzy_simplicial_set(knns, dists, n_neighbors, size(X, 2), local_connectivity, set_operation_ratio) - - embedding = initialize_embedding(graph, n_components, Val(init)) - - embedding = optimize_embedding(graph, embedding, embedding, n_epochs, learning_rate, min_dist, spread, repulsion_strength, neg_sample_rate, move_ref=true) - # TODO: if target variable y is passed, then construct target graph - # in the same manner and do a fuzzy simpl set intersection - - return UMAP_(graph, hcat(embedding...), X, knns, dists) +Graph(d::DataView) = Graph(KNNs(d)) +Graph(g::Graph) = g + +metric(x) = params(x).metric + +fuzzy_intersection(g1::Graph, g2::Graph; kwargs...) = Graph(nothing, _fuzzy_intersection(metric(g1), metric(g2), g1, g2; kwargs...)) + + +struct Embedding{G<:Graph, E, NT <: NamedTuple} + graph::G + embedding::E + embedding_params::NT end -""" - transform(model::UMAP_, Q::AbstractMatrix; ) -> embedding - -Use the given model to embed new points into an existing embedding. `Q` is a matrix of some number of points (columns) -in the same space as `model.data`. The returned embedding is the embedding of these points in n-dimensional space, where -n is the dimensionality of `model.embedding`. This embedding is created by finding neighbors of `Q` in `model.embedding` -and optimizing cross entropy according to membership strengths according to these neighbors. - -# Keyword Arguments -- `n_neighbors::Integer = 15`: the number of neighbors to consider as locally connected. Larger values capture more global structure in the data, while small values capture more local structure. -- `metric::{SemiMetric, Symbol} = Euclidean()`: the metric to calculate distance in the input space. It is also possible to pass `metric = :precomputed` to treat `X` like a precomputed distance matrix. -- `n_epochs::Integer = 300`: the number of training epochs for embedding optimization -- `learning_rate::Real = 1`: the initial learning rate during optimization -- `init::Symbol = :spectral`: how to initialize the output embedding; valid options are `:spectral` and `:random` -- `min_dist::Real = 0.1`: the minimum spacing of points in the output embedding -- `spread::Real = 1`: the effective scale of embedded points. Determines how clustered embedded points are in combination with `min_dist`. -- `set_operation_ratio::Real = 1`: interpolates between fuzzy set union and fuzzy set intersection when constructing the UMAP graph (global fuzzy simplicial set). The value of this parameter should be between 1.0 and 0.0: 1.0 indicates pure fuzzy union, while 0.0 indicates pure fuzzy intersection. -- `local_connectivity::Integer = 1`: the number of nearest neighbors that should be assumed to be locally connected. The higher this value, the more connected the manifold becomes. This should not be set higher than the intrinsic dimension of the manifold. -- `repulsion_strength::Real = 1`: the weighting of negative samples during the optimization process. -- `neg_sample_rate::Integer = 5`: the number of negative samples to select for each positive sample. Higher values will increase computational cost but result in slightly more accuracy. -- `a = nothing`: this controls the embedding. By default, this is determined automatically by `min_dist` and `spread`. -- `b = nothing`: this controls the embedding. By default, this is determined automatically by `min_dist` and `spread`. -""" -function transform(model::UMAP_, - Q::AbstractMatrix{S}; - n_neighbors::Integer = 15, - metric::Union{SemiMetric, Symbol} = Euclidean(), - n_epochs::Integer = 100, - learning_rate::Real = 1, - min_dist::Real = 1//10, - spread::Real = 1, - set_operation_ratio::Real = 1, - local_connectivity::Integer = 1, - repulsion_strength::Real = 1, - neg_sample_rate::Integer = 5, - a::Union{Real, Nothing} = nothing, - b::Union{Real, Nothing} = nothing - ) where {S<:Real} - # argument checking - size(Q, 2) > n_neighbors > 0 || throw(ArgumentError("size(Q, 2) must be greater than n_neighbors and n_neighbors must be greater than 0")) - learning_rate > 0 || throw(ArgumentError("learning_rate must be greater than 0")) - min_dist > 0 || throw(ArgumentError("min_dist must be greater than 0")) - 0 ≤ set_operation_ratio ≤ 1 || throw(ArgumentError("set_operation_ratio must lie in [0, 1]")) - local_connectivity > 0 || throw(ArgumentError("local_connectivity must be greater than 0")) - size(model.data, 2) == size(model.embedding, 2) || throw(ArgumentError("model.data must have same number of columns as model.embedding")) - size(model.data, 1) == size(Q, 1) || throw(ArgumentError("size(model.data, 1) must equal size(Q, 1)")) - - - n_epochs = max(0, n_epochs) - # main algorithm - knns, dists = knn_search(model.data, Q, n_neighbors, metric, model.knns, model.dists) - graph = fuzzy_simplicial_set(knns, dists, n_neighbors, size(model.data, 2), local_connectivity, set_operation_ratio, false) - - embedding = initialize_embedding(graph, model.embedding) - ref_embedding = collect(eachcol(model.embedding)) - embedding = optimize_embedding(graph, embedding, ref_embedding, n_epochs, learning_rate, min_dist, spread, repulsion_strength, neg_sample_rate, a, b, move_ref=false) - - return reduce(hcat, embedding) +const DEFAULT_EMBEDDING_PARAMS = Ref((; n_components=2, n_epochs = 300, learning_rate = 1, init = :spectral, min_dist = 1//10, spread=1, repulsion_strength = 1, neg_sample_rate = 5, a = nothing, b = nothing )) + +function Embedding(g::Graph; kwargs...) + embedding_params = merge(DEFAULT_EMBEDDING_PARAMS[], kwargs) + + init_embedding = initialize_embedding(g.graph, embedding_params.n_components, Val(embedding_params.init)) + + embedding = optimize_embedding(g.graph, init_embedding, init_embedding, embedding_params.n_epochs, embedding_params.learning_rate, embedding_params.min_dist, embedding_params.spread, embedding_params.repulsion_strength, embedding_params.neg_sample_rate, move_ref=true) + + return Embedding(g, reduce(hcat, embedding), embedding_params) +end + +function Embedding(g1::Graph, g2::Graph; mix_weight=0.5, kwargs...) + return Embedding(fuzzy_intersection(g1, g2; mix_weight=mix_weight); kwargs...) end +Embedding(args...; kwargs...) = Embedding(map(Graph, args)...; kwargs...) + + +const SMOOTH_K_TOLERANCE = 1e-5 + + +# """ +# transform(model::UMAP_, Q::AbstractVecOrMat; ) -> embedding + +# Use the given model to embed new points into an existing embedding. `Q` is a matrix of some number of points (columns) +# or a vector of data points in the same space as `model.data`. The returned embedding is the embedding of these points in n-dimensional space, where +# n is the dimensionality of `model.embedding`. This embedding is created by finding neighbors of `Q` in `model.embedding` +# and optimizing cross entropy according to membership strengths according to these neighbors. + +# # Keyword Arguments +# - `n_neighbors::Integer = 15`: the number of neighbors to consider as locally connected. Larger values capture more global structure in the data, while small values capture more local structure. +# - `metric::{SemiMetric, Symbol} = Euclidean()`: the metric to calculate distance in the input space. It is also possible to pass `metric = :precomputed` to treat `X` like a precomputed distance matrix. +# - `n_epochs::Integer = 300`: the number of training epochs for embedding optimization +# - `learning_rate::Real = 1`: the initial learning rate during optimization +# - `init::Symbol = :spectral`: how to initialize the output embedding; valid options are `:spectral` and `:random` +# - `min_dist::Real = 0.1`: the minimum spacing of points in the output embedding +# - `spread::Real = 1`: the effective scale of embedded points. Determines how clustered embedded points are in combination with `min_dist`. +# - `set_operation_ratio::Real = 1`: interpolates between fuzzy set union and fuzzy set intersection when constructing the UMAP graph (global fuzzy simplicial set). The value of this parameter should be between 1.0 and 0.0: 1.0 indicates pure fuzzy union, while 0.0 indicates pure fuzzy intersection. +# - `local_connectivity::Integer = 1`: the number of nearest neighbors that should be assumed to be locally connected. The higher this value, the more connected the manifold becomes. This should not be set higher than the intrinsic dimension of the manifold. +# - `repulsion_strength::Real = 1`: the weighting of negative samples during the optimization process. +# - `neg_sample_rate::Integer = 5`: the number of negative samples to select for each positive sample. Higher values will increase computational cost but result in slightly more accuracy. +# - `a = nothing`: this controls the embedding. By default, this is determined automatically by `min_dist` and `spread`. +# - `b = nothing`: this controls the embedding. By default, this is determined automatically by `min_dist` and `spread`. +# """ +# function transform(model::UMAP_, +# Q::AbstractVecOrMat; +# n_neighbors::Integer = 15, +# metric::Union{SemiMetric, Symbol} = Euclidean(), +# n_epochs::Integer = 100, +# learning_rate::Real = 1, +# min_dist::Real = 1//10, +# spread::Real = 1, +# set_operation_ratio::Real = 1, +# local_connectivity::Integer = 1, +# repulsion_strength::Real = 1, +# neg_sample_rate::Integer = 5, +# a::Union{Real, Nothing} = nothing, +# b::Union{Real, Nothing} = nothing +# ) +# # argument checking +# size(Q)[end] > n_neighbors > 0 || throw(ArgumentError("`size(Q)[end]` must be greater than n_neighbors and n_neighbors must be greater than 0")) +# learning_rate > 0 || throw(ArgumentError("learning_rate must be greater than 0")) +# min_dist > 0 || throw(ArgumentError("min_dist must be greater than 0")) +# 0 ≤ set_operation_ratio ≤ 1 || throw(ArgumentError("set_operation_ratio must lie in [0, 1]")) +# local_connectivity > 0 || throw(ArgumentError("local_connectivity must be greater than 0")) +# size(model.data)[end] == size(model.embedding, 2) || throw(ArgumentError("model.data must have same number of columns or data points as model.embedding")) +# ndims(model.data) == 1 || size(model.data, 1) == size(Q, 1) || throw(ArgumentError("size(model.data, 1) must equal size(Q, 1)")) + + +# n_epochs = max(0, n_epochs) +# # main algorithm +# knns, dists = knn_search(model.data, Q, n_neighbors, metric, model.knns, model.dists) +# graph = fuzzy_simplicial_set(knns, dists, n_neighbors, size(model.data)[end], local_connectivity, set_operation_ratio, false) + +# embedding = initialize_embedding(graph, model.embedding) +# ref_embedding = collect(eachcol(model.embedding)) +# embedding = optimize_embedding(graph, embedding, ref_embedding, n_epochs, learning_rate, min_dist, spread, repulsion_strength, neg_sample_rate, a, b, move_ref=false) + +# return reduce(hcat, embedding) +# end + """ fuzzy_simplicial_set(knns, dists, n_neighbors, n_points, local_connectivity, set_op_ratio, apply_fuzzy_combine=true) -> membership_graph::SparseMatrixCSC, diff --git a/src/utils.jl b/src/utils.jl index cd0cd65..9f4de5b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -23,10 +23,10 @@ function fit_ab(min_dist, spread, ::Nothing, ::Nothing) end -knn_search(X::AbstractMatrix, k, metric::Symbol) = knn_search(X, k, Val(metric)) +knn_search(X::AbstractVecOrMat, k, metric::Symbol; nndescent_kwargs = NamedTuple()) = knn_search(X, k, Val(metric); nndescent_kwargs=nndescent_kwargs) # treat given matrix `X` as distance matrix -knn_search(X::AbstractMatrix, k, ::Val{:precomputed}) = _knn_from_dists(X, k) +knn_search(X::AbstractVecOrMat, k, ::Val{:precomputed}; nndescent_kwargs = NamedTuple()) = _knn_from_dists(X, k) """ knn_search(X, k, metric) -> knns, dists @@ -42,13 +42,14 @@ Find the `k` nearest neighbors of each point. - `knns`: `knns[j, i]` is the index of node i's jth nearest neighbor. - `dists`: `dists[j, i]` is the distance of node i's jth nearest neighbor. """ -function knn_search(X::AbstractMatrix, +function knn_search(X::AbstractVecOrMat, k, - metric::SemiMetric) - if size(X, 2) < 4096 + metric::SemiMetric; + nndescent_kwargs = NamedTuple()) + if size(X)[end] < 4096 return knn_search(X, k, metric, Val(:pairwise)) else - return knn_search(X, k, metric, Val(:approximate)) + return knn_search(X, k, metric, Val(:approximate); nndescent_kwargs=nndescent_kwargs) end end @@ -65,12 +66,25 @@ function knn_search(X::AbstractMatrix{S}, return _knn_from_dists(dist_mat, k) end +function knn_search(X::AbstractVector, + k, + metric, + ::Val{:pairwise}) + num_points = length(X) + T = result_type(metric, first(X), first(X)) + dist_mat = [i < j ? evaluate(metric, X[i], X[j]) : zero(T) for i in eachindex(X), j in eachindex(X)] + dist_mat = Symmetric(dist_mat, :U) + return _knn_from_dists(dist_mat, k) +end + + # find the approximate k nearest neighbors using NNDescent -function knn_search(X::AbstractMatrix{S}, +function knn_search(X::AbstractVecOrMat, k, metric, - ::Val{:approximate}) where {S <: Real} - knngraph = nndescent(X, k, metric) + ::Val{:approximate}; + nndescent_kwargs=NamedTuple()) + knngraph = nndescent(X, k, metric; nndescent_kwargs...) return knn_matrices(knngraph) end @@ -93,14 +107,19 @@ If the matrices are small, search for exact nearest neighbors of `Q` by computin - `knns`: `knns[j, i]` is the index of node i's jth nearest neighbor. - `dists`: `dists[j, i]` is the distance of node i's jth nearest neighbor. """ -function knn_search(X::AbstractMatrix, - Q::AbstractMatrix, +function knn_search(X::AbstractVecOrMat, + Q::AbstractVecOrMat, k::Integer, metric::SemiMetric, knns::AbstractMatrix{<:Integer}, dists::AbstractMatrix{<:Real}) - if size(X, 2) < 4096 - return _knn_from_dists(pairwise(metric, X, Q, dims=2), k, ignore_diagonal=false) + if size(X)[end] < 4096 + if ndims(X) == 2 + dists = pairwise(metric, X, Q, dims=2) + else + dists = [evaluate(metric, X[i], Q[j]) for i in eachindex(X), j in eachindex(Q)] + end + return _knn_from_dists(dists, k, ignore_diagonal=false) else knngraph = HeapKNNGraph(collect(eachcol(X)), metric, knns, dists) return search(knngraph, collect(eachcol(Q)), k; max_candidates=8*k) @@ -134,3 +153,72 @@ end @inline function fuzzy_set_intersection(fs_set::AbstractMatrix) return fs_set .* fs_set' end + +function _fuzzy_intersection(metric1, metric2::Categorical, g::Graph, y_graph::Graph; kwargs...) + graph = g.graph + y = y_graph.knns.data_with_metric.data + unknown_weight = exp(-metric2.unknown_dist) + far_weight = exp(-metric2.far_dist) + I, J, V = findnz(graph) + for nz in eachindex(I,J,V) + yi = y[I[nz]] + yj = y[J[nz]] + if ismissing(yi) || ismissing(yj) + V[nz] *= unknown_weight + elseif yi != yj + V[nz] *= far_weight + end + end + return sparse(I, J, V, size(graph)...) +end + +function _fuzzy_intersection(metric1, metric2, g1::Graph, g2::Graph; mix_weight = 0.5, kwargs...) + left = g1.graph + right = g2.graph + + left_min = max(minimum(nonzeros(left)) / 2.0, 1.0e-8) + right_min = min(max((1.0 - maximum(nonzeros(right))) / 2.0, 1.0e-8), 1e-4) + + + result = left + right + result_I, result_J, result_V = findnz(result) + + left_rows = rowvals(left) + left_vals = nonzeros(left) + + right_rows = rowvals(right) + right_vals = nonzeros(right) + + exp = mix_weight < 0.5 ? (mix_weight / (1-mix_weight)) : (1-mix_weight) / mix_weight + + for idx in eachindex(result_I, result_J, result_V) + i = result_I[idx] + j = result_J[idx] + + left_val = left_min + for k in nzrange(left, j) + if left_rows[k] == i + left_val = left_vals[k] + end + end + + + right_val = left_min + for k in nzrange(right, j) + if right_rows[k] == i + right_val = right_vals[k] + end + end + + + if left_val > left_min || right_val > right_min + if mix_weight < 0.5 + result_V[idx] = left_val * right_val^exp + else + result_V[idx] = left_val^exp * right_val + end + end + + end + return sparse(result_I, result_J, result_V) +end