Skip to content

Separable filtering #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.dub
docs.json
__dummy.html
*.o
*.obj
__test__*__
dub.selections.json
libmir-cv.a
34 changes: 34 additions & 0 deletions dub.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"name": "mir-cv",
"authors": [
"Relja Ljubobratovic",
"Ilya Yaroshenko",
"Mir Developers (see information per file)"
],
"description": "Low-level (betterC) modules for dcv library.",
"copyright": "Copyright © 2017, Relja Ljubobratovic, Ilya Yaroshenko",
"license": "BSL-1.0",
"dependencies": {
"mir-cpuid" : "~>0.4.2",
"mir-algorithm": "~>0.5.0"
},
"buildTypes": {
"betterC": {
"buildOptions": ["noBoundsCheck", "releaseMode", "optimize", "inline"],
"dflags-ldc": ["-betterC", "-mcpu=native", "-enable-cross-module-inlining"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

}
},
"configurations": [
{
"name": "default"
},
{
"name": "static",
"targetType": "staticLibrary"
},
{
"name": "dynamic",
"targetType": "dynamicLibrary"
}
]
}
152 changes: 152 additions & 0 deletions source/mir/cv/core/memory.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/+
Module contains memory handing routines used throughout the library.
+/
module mir.cv.core.memory;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please create a repo mir-rt and copy glas/internal/memory here to be reused both in mir-cv and mir-glas? It is already contains required pragmas.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great- will do!


import core.stdc.stdlib : malloc, free;

version (Posix)
@nogc nothrow
private extern(C) int posix_memalign(void**, size_t, size_t);

// Copy pasta of aligned memory allocation used in std.experimental.allocation.mallocator.AlignedMallocator ////////////

version (Windows)
{
// DMD Win 32 bit, DigitalMars C standard library misses the _aligned_xxx
// functions family (snn.lib)
version(CRuntime_DigitalMars)
{
// Helper to cast the infos written before the aligned pointer
// this header keeps track of the size (required to realloc) and of
// the base ptr (required to free).
private struct AlignInfo
{
void* basePtr;
size_t size;

@nogc nothrow
static AlignInfo* opCall(void* ptr)
{
return cast(AlignInfo*) (ptr - AlignInfo.sizeof);
}
}

@nogc nothrow
private void* _aligned_malloc(size_t size, size_t alignment)
{
import std.c.stdlib: malloc;
size_t offset = alignment + size_t.sizeof * 2 - 1;

// unaligned chunk
void* basePtr = malloc(size + offset);
if (!basePtr) return null;

// get aligned location within the chunk
void* alignedPtr = cast(void**)((cast(size_t)(basePtr) + offset)
& ~(alignment - 1));

// write the header before the aligned pointer
AlignInfo* head = AlignInfo(alignedPtr);
head.basePtr = basePtr;
head.size = size;

return alignedPtr;
}

@nogc nothrow
private void* _aligned_realloc(void* ptr, size_t size, size_t alignment)
{
import std.c.stdlib: free;
import std.c.string: memcpy;

if(!ptr) return _aligned_malloc(size, alignment);

// gets the header from the exising pointer
AlignInfo* head = AlignInfo(ptr);

// gets a new aligned pointer
void* alignedPtr = _aligned_malloc(size, alignment);
if (!alignedPtr)
{
//to https://msdn.microsoft.com/en-us/library/ms235462.aspx
//see Return value: in this case the original block is unchanged
return null;
}

// copy exising data
memcpy(alignedPtr, ptr, head.size);
free(head.basePtr);

return alignedPtr;
}

@nogc nothrow
private void _aligned_free(void *ptr)
{
import std.c.stdlib: free;
if (!ptr) return;
AlignInfo* head = AlignInfo(ptr);
free(head.basePtr);
}

}
// DMD Win 64 bit, uses microsoft standard C library which implements them
else
{
@nogc nothrow private extern(C) void* _aligned_malloc(size_t, size_t);
@nogc nothrow private extern(C) void _aligned_free(void *memblock);
@nogc nothrow private extern(C) void* _aligned_realloc(void *, size_t, size_t);
}
}

version(Posix)
@trusted @nogc nothrow
void[] alignedAllocate(size_t bytes, uint a)
{
import core.stdc.errno : ENOMEM, EINVAL;
void* result;
auto code = posix_memalign(&result, a, bytes);
if (code == ENOMEM)
return null;

else if (code == EINVAL)
assert (0, "AlignedMallocator.alignment is not a power of two multiple of (void*).sizeof, according to posix_memalign!");

else if (code != 0)
assert (0, "posix_memalign returned an unknown code!");

else
return result[0 .. bytes];
}
else version(Windows)
@trusted @nogc nothrow
void[] alignedAllocate(size_t bytes, uint a)
{
auto result = _aligned_malloc(bytes, a);
return result ? result[0 .. bytes] : null;
}
else static assert(0);

/**
Calls $(D free(b.ptr)) on Posix and
$(WEB msdn.microsoft.com/en-US/library/17b5h8td(v=vs.80).aspx,
$(D __aligned_free(b.ptr))) on Windows.
*/
version (Posix)
@system @nogc nothrow
bool deallocate(void[] b)
{
import core.stdc.stdlib : free;
free(b.ptr);
return true;
}
else version (Windows)
@system @nogc nothrow
bool deallocate(void[] b)
{
_aligned_free(b.ptr);
return true;
}
else static assert(0);

77 changes: 77 additions & 0 deletions source/mir/cv/core/simd.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/+
Utilities to help use SIMD vectors.
+/

module mir.cv.core.simd;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would propose another approach that is used in mir-glas, see glas/internal/config.

The reason is mir-cv is open source and boost licensed and d compilers are fast. LDC supports compile time traits for CPU information (see glas internal config). Lets move forward with LDC for betterC libraries. Front-end DCV users would be able to use DMD because extern(C) interface.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are able to compiler multople mir-cv dynamic backends for different CPUs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Front-end DCV users would be able to use DMD because extern(C) interface.

I think you meant to say 'Front-end DCV users would be able to use mir-cv...'? If thats the case, I concur, I understood that was the idea from the beginning.

I would propose another approach that is used in mir-glas, see glas/internal/config.

Nice! It's really cool there are already some modules that we can reuse in mir libs. I'll study this module and see how I can fit it here. Should this also be copied to mir-rt?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Yes
  2. Yes, but to partially, and to mir-algorithm, because it is compile time logic. We need lists of available vector length for different types.


import cpuid.x86_any;
import std.traits : isNumeric, TemplateOf;

extern(C) @system nothrow @nogc:

private
{
// TODO: Do we need this?
struct Init_cpu_id
{
static void init()
{
cpuid_x86_any_init();
}
}
static Init_cpu_id __cpuid_init;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need this to get L1/L2/L3 cache levels in the future. In the same time I remember Walter and Andrei proposed extern(C) shared this() module constructors that would work in betterC.

}


/++
SIMD instructionset traits.
+/
mixin template Instruction_set_trait(size_t _bitsize, T)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please avoid using instruction set. This makes code crypto. Use __vector(T[N]), T, size_t N as templates arguments.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you're right. InstructionSet was actually an idea to cheat, where one function would be used for vectorized and non-vectorized code (by using Non_SIMD). But I agree it feels cryptic... I'll try redesigning it.

{
enum bitsize = _bitsize;
enum elementCount = (bitsize / 8) / T.sizeof;

alias Vector = .Vector!(bitsize, T);
alias Scalar = T;
}

/// SSE (128bit) instruction set descriptor.
template SSE(T)
if (isNumeric!T)
{
mixin Instruction_set_trait!(128, T);
}

/// AVX (256bit) instruction set descriptor.
template AVX(T)
if (isNumeric!T)
{
mixin Instruction_set_trait!(256, T);
}

/// Non-simd, instruction set mock-up.
template Non_SIMD(T)
if (isNumeric!T)
{
enum bitsize = 8;
enum elementCount = 1;
alias Vector = T;
alias Scalar = T;
}

template Is_SIMD(alias InstructionSet){
static if (__traits(isSame, TemplateOf!InstructionSet, SSE)) {
enum Is_SIMD = true;
} else static if (__traits(isSame, TemplateOf!InstructionSet, AVX) ) {
enum Is_SIMD = true;
} else {
enum Is_SIMD = false;
}
}

/// SIMD vector trait - build vector type using bitsize and scalar type.
template Vector(size_t bitsize, T)
{
alias Vector = __vector(T[(bitsize / 8) / T.sizeof]);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use __vector instead. This would make code more readable for new commers

}

Loading