Skip to content

Custom kws #1069

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ test/.coverage
test/data/legacy_serialized.pt
*~
.idea
data/debabble
83 changes: 54 additions & 29 deletions models/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]):
return net


def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_ids=[]):
def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_ids=[], kws=None, paddings=None):
"""Create a generator

Parameters:
Expand Down Expand Up @@ -151,15 +151,15 @@ def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, in
elif netG == 'resnet_6blocks':
net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6)
elif netG == 'unet_128':
net = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout)
net = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout, kws=kws, paddings=paddings)
elif netG == 'unet_256':
net = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout)
net = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout, kws=kws, paddings=paddings)
else:
raise NotImplementedError('Generator model name [%s] is not recognized' % netG)
return init_net(net, init_type, init_gain, gpu_ids)


def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal', init_gain=0.02, gpu_ids=[]):
def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal', init_gain=0.02, gpu_ids=[], kws=None, paddings=None):
"""Create a discriminator

Parameters:
Expand Down Expand Up @@ -193,9 +193,9 @@ def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal'
norm_layer = get_norm_layer(norm_type=norm)

if netD == 'basic': # default PatchGAN classifier
net = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer)
net = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer, kws=kws, paddings=paddings)
elif netD == 'n_layers': # more options
net = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer)
net = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer, kws=kws, paddings=paddings)
elif netD == 'pixel': # classify if each pixel is real or fake
net = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer)
else:
Expand Down Expand Up @@ -436,7 +436,7 @@ def forward(self, x):
class UnetGenerator(nn.Module):
"""Create a Unet-based generator"""

def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False):
def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, kws=None, paddings=None):
"""Construct a Unet generator
Parameters:
input_nc (int) -- the number of channels in input images
Expand All @@ -450,15 +450,24 @@ def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNo
It is a recursive process.
"""
super(UnetGenerator, self).__init__()
if kws is None:
self.kws = [None for _ in range(num_downs)]
self.paddings = [None for _ in range(num_downs)]
elif paddings is None:
self.kws = kws
self.paddings = [None for _ in range(num_downs)]
else:
self.kws = kws
self.paddings = paddings
# construct unet structure
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True, kws=self.kws[-1], paddings=self.paddings[-1]) # add the innermost layer
for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout, kws=self.kws[-i - 2], paddings=self.paddings[-i - 2])
# gradually reduce the number of filters from ngf * 8 to ngf
unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer
unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, kws=self.kws[-num_downs + 3], paddings=self.paddings[-num_downs + 3])
unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer, kws=self.kws[-num_downs + 2], paddings=self.paddings[-num_downs + 2])
unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer, kws=self.kws[-num_downs + 1], paddings=self.paddings[-num_downs + 1])
self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer, kws=self.kws[-num_downs], paddings=self.paddings[-num_downs]) # add the outermost layer

def forward(self, input):
"""Standard forward"""
Expand All @@ -472,7 +481,7 @@ class UnetSkipConnectionBlock(nn.Module):
"""

def __init__(self, outer_nc, inner_nc, input_nc=None,
submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False, kws=None, paddings=None):
"""Construct a Unet submodule with skip connections.

Parameters:
Expand All @@ -486,38 +495,47 @@ def __init__(self, outer_nc, inner_nc, input_nc=None,
use_dropout (bool) -- if use dropout layers.
"""
super(UnetSkipConnectionBlock, self).__init__()
if kws is None:
self.kws = 4
self.paddings = 1
elif paddings is None:
self.kws = kws
self.paddings = 1
else:
self.kws = kws
self.paddings = paddings
self.outermost = outermost
if type(norm_layer) == functools.partial:
use_bias = norm_layer.func == nn.InstanceNorm2d
else:
use_bias = norm_layer == nn.InstanceNorm2d
if input_nc is None:
input_nc = outer_nc
downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
stride=2, padding=1, bias=use_bias)
downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=self.kws,
stride=2, padding=self.paddings, bias=use_bias)
downrelu = nn.LeakyReLU(0.2, True)
downnorm = norm_layer(inner_nc)
uprelu = nn.ReLU(True)
upnorm = norm_layer(outer_nc)

if outermost:
upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
kernel_size=4, stride=2,
padding=1)
kernel_size=self.kws, stride=2,
padding=self.paddings)
down = [downconv]
up = [uprelu, upconv, nn.Tanh()]
model = down + [submodule] + up
elif innermost:
upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
kernel_size=4, stride=2,
padding=1, bias=use_bias)
kernel_size=self.kws, stride=2,
padding=self.paddings, bias=use_bias)
down = [downrelu, downconv]
up = [uprelu, upconv, upnorm]
model = down + up
else:
upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
kernel_size=4, stride=2,
padding=1, bias=use_bias)
kernel_size=self.kws, stride=2,
padding=self.paddings, bias=use_bias)
down = [downrelu, downconv, downnorm]
up = [uprelu, upconv, upnorm]

Expand All @@ -538,7 +556,7 @@ def forward(self, x):
class NLayerDiscriminator(nn.Module):
"""Defines a PatchGAN discriminator"""

def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d):
def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, kws=None, paddings=None):
"""Construct a PatchGAN discriminator

Parameters:
Expand All @@ -548,34 +566,41 @@ def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d):
norm_layer -- normalization layer
"""
super(NLayerDiscriminator, self).__init__()
if kws is None:
self.kws = [4 for _ in range(n_layers + 2)]
self.paddings = [1 for _ in range(n_layers + 2)]
elif paddings is None:
self.kws = kws
self.paddings = [1 for _ in range(n_layers + 2)]
else:
self.kws = kws
self.paddings = paddings
if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters
use_bias = norm_layer.func == nn.InstanceNorm2d
else:
use_bias = norm_layer == nn.InstanceNorm2d

kw = 4
padw = 1
sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]
sequence = [nn.Conv2d(input_nc, ndf, kernel_size=self.kws[0], stride=2, padding=self.paddings[0]), nn.LeakyReLU(0.2, True)]
nf_mult = 1
nf_mult_prev = 1
for n in range(1, n_layers): # gradually increase the number of filters
nf_mult_prev = nf_mult
nf_mult = min(2 ** n, 8)
sequence += [
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias),
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=self.kws[n], stride=2, padding=self.paddings[n], bias=use_bias),
norm_layer(ndf * nf_mult),
nn.LeakyReLU(0.2, True)
]

nf_mult_prev = nf_mult
nf_mult = min(2 ** n_layers, 8)
sequence += [
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias),
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=self.kws[n_layers], stride=1, padding=self.paddings[n_layers], bias=use_bias),
norm_layer(ndf * nf_mult),
nn.LeakyReLU(0.2, True)
]

sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] # output 1 channel prediction map
sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=self.kws[n_layers + 1], stride=1, padding=self.paddings[n_layers + 1])] # output 1 channel prediction map
self.model = nn.Sequential(*sequence)

def forward(self, input):
Expand Down
4 changes: 2 additions & 2 deletions models/pix2pix_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ def __init__(self, opt):
self.model_names = ['G']
# define networks (both generator and discriminator)
self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, opt.norm,
not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids)
not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids, kws=opt.kwsG, paddings=opt.paddingsG)

if self.isTrain: # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc
self.netD = networks.define_D(opt.input_nc + opt.output_nc, opt.ndf, opt.netD,
opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids)
opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids, kws=opt.kwsD, paddings=opt.paddingsD)

if self.isTrain:
# define loss functions
Expand Down
18 changes: 17 additions & 1 deletion options/base_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ def initialize(self, parser):
parser.add_argument('--netD', type=str, default='basic', help='specify discriminator architecture [basic | n_layers | pixel]. The basic model is a 70x70 PatchGAN. n_layers allows you to specify the layers in the discriminator')
parser.add_argument('--netG', type=str, default='resnet_9blocks', help='specify generator architecture [resnet_9blocks | resnet_6blocks | unet_256 | unet_128]')
parser.add_argument('--n_layers_D', type=int, default=3, help='only used if netD==n_layers')
parser.add_argument('--kwsD', type=str, default='', help='n_layers + 2 numbers separated by underscores, corresponding to the discriminator kernel widths. default empty string will be interpreted as 4_4_..._4. Give a custom input as a string in the form 4_3_4_3_3_3 for example.')
parser.add_argument('--paddingsD', type=str, default='', help='n_layers + 2 numbers separated by underscores, corresponding to the discriminator padding amounts. Default empty string will be interpreted as 1_ ... _1. Give a custom input as a string in the form 2_1_1_1_2_2 for example.')
parser.add_argument('--kwsG', type=str, default='', help='The kernel widths for the generator blocks, starting from outermost in (only works for Unet currently). Must be length 8 for unet256, and length 7 for unet128. Default empty string is interpreted as 4_4_4_4_4_4_4_4 for unet256 and 4_4_4_4_4_4_4 for unet128')
parser.add_argument('--paddingsG', type=str, default='', help='The padding amounts for the generator blocks, starting from outermost in (only works for Unet currently). Must be length 8 for unet256, and length 7 for unet128. Default empty string is interpreted as 1_1_1_1_1_1_1_1 for unet256 and 1_1_1_1_1_1_1 for unet128')
parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization [instance | batch | none]')
parser.add_argument('--init_type', type=str, default='normal', help='network initialization [normal | xavier | kaiming | orthogonal]')
parser.add_argument('--init_gain', type=float, default=0.02, help='scaling factor for normal, xavier and orthogonal.')
Expand Down Expand Up @@ -121,7 +125,19 @@ def parse(self):
opt.name = opt.name + suffix

self.print_options(opt)

# decode kws and paddings strings
kwsD = opt.kwsD.split('_')
kwsD = None if kwsD == [''] else [int(value) for value in kwsD]
opt.kwsD = kwsD
kwsG = opt.kwsG.split('_')
kwsG = None if kwsG == [''] else [int(value) for value in kwsG]
opt.kwsG = kwsG
paddingsD = opt.paddingsD.split('_')
paddingsD = None if paddingsD == [''] else [int(value) for value in paddingsD]
opt.paddingsD = paddingsD
paddingsG = opt.paddingsG.split('_')
paddingsG = None if paddingsG == [''] else [int(value) for value in paddingsG]
opt.paddingsG = paddingsG
# set gpu ids
str_ids = opt.gpu_ids.split(',')
opt.gpu_ids = []
Expand Down
87 changes: 87 additions & 0 deletions prep_debabble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import sys

sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages')
import os
import glob
import random
import librosa
import numpy as np
from tqdm import tqdm
#from tqdm.notebook import tqdm
import cv2

def melpectrogram(y, start_sample=0, n_samples=None, min_dB=-100,n_fft = 1024,hop_length=256,n_mels=80,fmin=40, fmax=8000):
if n_samples is None: n_samples = len(y)
y = y[start_sample:start_sample+n_samples]
#FFT
D = librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length)
D = np.abs(D)
#melspec
S = librosa.feature.melspectrogram(S=D, sr=r, n_fft=n_fft, n_mels=n_mels, fmin=fmin)
#amp to dB
S = 20 * np.log10(np.maximum(1e-5, S))
#normalize
return np.clip((S - min_dB) / -min_dB, 0, 1)

def mkpath(directory):
if not os.path.exists(directory):
os.makedirs(directory)

noise_gain_str = '2.5'
n_mels = 256
print('im here')
audio_directory = 'data/debabble/audio'
img_directory = f'data/debabble/mel_img{n_mels}_{noise_gain_str}'
mkpath(f'{img_directory}/A')
mkpath(f'{img_directory}/B')
#mkpath(f'{img_directory}/A/train')
#mkpath(f'{img_directory}/A/test')
#mkpath(f'{img_directory}/A/val')
#mkpath(f'{img_directory}/B/train')
#mkpath(f'{img_directory}/B/test')
#mkpath(f'{img_directory}/B/val')
files_A = glob.glob(f'{audio_directory}/A/*')

print(f'{audio_directory}/A/*')
import re
p = re.compile('speaker_([0-9]+)')
print(files_A)
for i, f in enumerate(tqdm(files_A)):
speaker_num = int(p.search(f).group(1))
fA = f
fB = f.replace('/A/', '/B/').replace('_clean.wav', f'_{noise_gain_str}_noise.wav')
yA, r = librosa.load(fA)
yB, r = librosa.load(fB)
melA = melpectrogram(yA, n_mels=n_mels)
melB = melpectrogram(yB, n_mels=n_mels)
# chop and save images
n_specs = melA.shape[1]
m = melA
for j in range(n_specs // n_mels):
slice_start = j * n_mels
slice_end = slice_start + n_mels
mA_slice = melA[:,slice_start:slice_end]
mB_slice = melB[:,slice_start:slice_end]
cv2.imwrite(f'{img_directory}/A/mel_{noise_gain_str}_{i:06d}_s{speaker_num:03d}_{j:02d}.png', mA_slice * 255.)
cv2.imwrite(f'{img_directory}/B/mel_{noise_gain_str}_{i:06d}_s{speaker_num:03d}_{j:02d}.png', mB_slice * 255.)

raise
imgs_A = glob.glob(f'{img_directory}/A/*.png')
n_images = len(imgs_A)
random.shuffle(imgs_A)
n_val = n_images // 10
n_test = n_images // 100
print(imgs_A)
for i, fA in enumerate(tqdm(imgs_A)):
fB = fA.replace('/A/', '/B/')
if i < n_test:
os.rename(fA, fA.replace('mel_0', 'test/mel_0'))
os.rename(fB, fB.replace('mel_0', 'test/mel_0'))
elif i < n_val:
os.rename(fA, fA.replace('mel_0', 'val/mel_0'))
os.rename(fB, fB.replace('mel_0', 'val/mel_0'))
else:
os.rename(fA, fA.replace('mel_0', 'train/mel_0'))
os.rename(fB, fB.replace('mel_0', 'train/mel_0'))

print('ran')