Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ tf_class {
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\', \'allow_last_axis_squeeze\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\'], "
argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\', \'allow_last_axis_squeeze\', \'name\', \'optional\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'False\'], "
}
member_method {
name: "from_config"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ tf_class {
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'input_shape\', \'batch_size\', \'dtype\', \'input_tensor\', \'sparse\', \'name\', \'ragged\', \'type_spec\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
argspec: "args=[\'self\', \'input_shape\', \'batch_size\', \'dtype\', \'input_tensor\', \'sparse\', \'name\', \'ragged\', \'type_spec\', \'optional\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\'], "
}
member_method {
name: "add_loss"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ tf_class {
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\', \'allow_last_axis_squeeze\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\'], "
argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\', \'allow_last_axis_squeeze\', \'name\', \'optional\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'False\'], "
}
member_method {
name: "from_config"
Expand Down
2 changes: 1 addition & 1 deletion tf_keras/api/golden/v1/tensorflow.keras.layers.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ tf_module {
}
member_method {
name: "Input"
argspec: "args=[\'shape\', \'batch_size\', \'name\', \'dtype\', \'sparse\', \'tensor\', \'ragged\', \'type_spec\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
argspec: "args=[\'shape\', \'batch_size\', \'name\', \'dtype\', \'sparse\', \'tensor\', \'ragged\', \'type_spec\', \'optional\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\'], "
}
member_method {
name: "add"
Expand Down
2 changes: 1 addition & 1 deletion tf_keras/api/golden/v1/tensorflow.keras.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,6 @@ tf_module {
}
member_method {
name: "Input"
argspec: "args=[\'shape\', \'batch_size\', \'name\', \'dtype\', \'sparse\', \'tensor\', \'ragged\', \'type_spec\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
argspec: "args=[\'shape\', \'batch_size\', \'name\', \'dtype\', \'sparse\', \'tensor\', \'ragged\', \'type_spec\', \'optional\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\'], "
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ tf_class {
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'input_shape\', \'batch_size\', \'dtype\', \'input_tensor\', \'sparse\', \'name\', \'ragged\', \'type_spec\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
argspec: "args=[\'self\', \'input_shape\', \'batch_size\', \'dtype\', \'input_tensor\', \'sparse\', \'name\', \'ragged\', \'type_spec\', \'optional\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\'], "
}
member_method {
name: "add_loss"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ tf_class {
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\', \'allow_last_axis_squeeze\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\'], "
argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\', \'allow_last_axis_squeeze\', \'name\', \'optional\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'False\'], "
}
member_method {
name: "from_config"
Expand Down
2 changes: 1 addition & 1 deletion tf_keras/api/golden/v2/tensorflow.keras.layers.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ tf_module {
}
member_method {
name: "Input"
argspec: "args=[\'shape\', \'batch_size\', \'name\', \'dtype\', \'sparse\', \'tensor\', \'ragged\', \'type_spec\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
argspec: "args=[\'shape\', \'batch_size\', \'name\', \'dtype\', \'sparse\', \'tensor\', \'ragged\', \'type_spec\', \'optional\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\'], "
}
member_method {
name: "add"
Expand Down
2 changes: 1 addition & 1 deletion tf_keras/api/golden/v2/tensorflow.keras.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,6 @@ tf_module {
}
member_method {
name: "Input"
argspec: "args=[\'shape\', \'batch_size\', \'name\', \'dtype\', \'sparse\', \'tensor\', \'ragged\', \'type_spec\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
argspec: "args=[\'shape\', \'batch_size\', \'name\', \'dtype\', \'sparse\', \'tensor\', \'ragged\', \'type_spec\', \'optional\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\'], "
}
}
47 changes: 34 additions & 13 deletions tf_keras/engine/data_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,9 @@ def _is_tensor(v):
return True
return False

return all(_is_tensor(v) for v in flat_inputs)
return all(_is_tensor(v) for v in flat_inputs if v is not None) and any(
_is_tensor(v) for v in flat_inputs
)

def __init__(
self,
Expand Down Expand Up @@ -259,7 +261,7 @@ def __init__(
inputs = pack_x_y_sample_weight(x, y, sample_weights)

num_samples = set(
int(i.shape[0]) for i in tf.nest.flatten(inputs)
int(i.shape[0]) for i in tf.nest.flatten(inputs) if i is not None
).pop()
_check_data_cardinality(inputs)

Expand Down Expand Up @@ -386,7 +388,7 @@ def slice_inputs(self, indices_dataset, inputs):

def grab_batch(i, data):
return tf.nest.map_structure(
lambda d: tf.gather(d, i, axis=0), data
lambda d: tf.gather(d, i, axis=0) if d is not None else d, data
)

dataset = dataset.map(grab_batch, num_parallel_calls=tf.data.AUTOTUNE)
Expand Down Expand Up @@ -459,7 +461,9 @@ def _is_array_like(v):
if not TensorLikeDataAdapter.can_handle(
x, y
) and not CompositeTensorDataAdapter.can_handle(x, y):
return all(_is_array_like(v) for v in flat_inputs)
return all(
_is_array_like(v) for v in flat_inputs if v is not None
) and any(v is not None for v in flat_inputs)
else:
return False

Expand Down Expand Up @@ -496,7 +500,7 @@ def dynamic_shape_like(t):
shape[0] = None
return tuple(shape)

flat_dtypes = [inp.dtype for inp in flat_inputs]
flat_dtypes = [inp.dtype for inp in flat_inputs if inp is not None]
contiguous = True
if self._shuffle and self._shuffle != "batch":
contiguous = False
Expand All @@ -509,15 +513,26 @@ def grab_batch(indices):
# to a Tensor may force it into memory..
def py_method(ind):
def slice_array(data):
if data is None:
return None
return training_utils.slice_arrays(
data, ind.numpy(), contiguous=contiguous
)

return [slice_array(inp) for inp in flat_inputs]
return [
slice_array(inp) for inp in flat_inputs if inp is not None
]

flat_out = tf.py_function(py_method, [indices], flat_dtypes)
for v, original_inp in zip(flat_out, flat_inputs):
v.set_shape(dynamic_shape_like(original_inp))
results = tf.py_function(py_method, [indices], flat_dtypes)
results_it = iter(results)
flat_out = []
for original_inp in flat_inputs:
if original_inp is None:
flat_out.append(None)
else:
v = next(results_it)
v.set_shape(dynamic_shape_like(original_inp))
flat_out.append(v)
return tf.nest.pack_sequence_as(inputs, flat_out)

dataset = indices_dataset.map(
Expand Down Expand Up @@ -608,8 +623,10 @@ def _is_tensor_or_composite(v):
return True
return _is_composite(v)

return any(_is_composite(v) for v in flat_inputs) and all(
_is_tensor_or_composite(v) for v in flat_inputs
return any(
_is_composite(v) for v in flat_inputs if v is not None
) and all(
_is_tensor_or_composite(v) for v in flat_inputs if v is not None
)

def __init__(
Expand Down Expand Up @@ -1944,14 +1961,18 @@ def single_batch_iterator(


def _check_data_cardinality(data):
num_samples = set(int(i.shape[0]) for i in tf.nest.flatten(data))
num_samples = set(
int(i.shape[0]) for i in tf.nest.flatten(data) if i is not None
)
if len(num_samples) > 1:
msg = "Data cardinality is ambiguous:\n"
for label, single_data in zip(["x", "y", "sample_weight"], data):
msg += " {} sizes: {}\n".format(
label,
", ".join(
str(i.shape[0]) for i in tf.nest.flatten(single_data)
str(i.shape[0])
for i in tf.nest.flatten(single_data)
if i is not None
),
)
msg += "Make sure all arrays contain the same number of samples."
Expand Down
122 changes: 122 additions & 0 deletions tf_keras/engine/data_adapter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from tf_keras.testing_infra import test_combinations
from tf_keras.testing_infra import test_utils
from tf_keras.utils import data_utils
from tf_keras.utils import dataset_creator

# isort: off
from tensorflow.python.eager import context
Expand Down Expand Up @@ -427,6 +428,26 @@ def _get_epoch(ds_iter):
# Check that each elements appears, and only once.
self.assertAllClose(x, np.sort(second_epoch_data))

def test_tensor_like_with_none_input(self):
x = [np.ones((10, 1), dtype=np.float32), None]
y = np.zeros((10, 1), dtype=np.float32)
self.assertTrue(data_adapter.TensorLikeDataAdapter.can_handle(x, y))
adapter = data_adapter.TensorLikeDataAdapter(
x, y, batch_size=2, shuffle=False
)
dataset = adapter.get_dataset()
self.assertEqual(adapter.get_size(), 5)
self.assertFalse(adapter.has_partial_batch())
self.assertIsNone(adapter.partial_batch_size())
for i, batch in enumerate(dataset):
x_batch, y_batch, _ = data_adapter.unpack_x_y_sample_weight(batch)
self.assertIsInstance(x_batch, tuple)
self.assertEqual(x_batch[0].shape, (2, 1))
self.assertIsNone(x_batch[1])
self.assertEqual(y_batch.shape, (2, 1))
if i >= 4:
break

@test_combinations.run_all_keras_modes(always_skip_v1=True)
def test_batch_shuffle_correctness(self):
num_samples = 100
Expand Down Expand Up @@ -787,6 +808,28 @@ def _get_epoch(ds_iter):
# Check that each elements appears, and only once.
self.assertAllClose(x, np.sort(second_epoch_data))

def test_generic_array_like_with_none_input(self):
x = [DummyArrayLike(np.ones((10, 1), dtype=np.float32)), None]
y = DummyArrayLike(np.zeros((10, 1), dtype=np.float32))
self.assertTrue(
data_adapter.GenericArrayLikeDataAdapter.can_handle(x, y)
)
adapter = data_adapter.GenericArrayLikeDataAdapter(
x, y, batch_size=2, shuffle=False
)
dataset = adapter.get_dataset()
self.assertEqual(adapter.get_size(), 5)
self.assertFalse(adapter.has_partial_batch())
self.assertIsNone(adapter.partial_batch_size())
for i, batch in enumerate(dataset):
x_batch, y_batch, _ = data_adapter.unpack_x_y_sample_weight(batch)
self.assertIsInstance(x_batch, tuple)
self.assertEqual(x_batch[0].shape, (2, 1))
self.assertIsNone(x_batch[1])
self.assertEqual(y_batch.shape, (2, 1))
if i >= 4:
break

@test_combinations.run_all_keras_modes(always_skip_v1=True)
def test_batch_shuffle_correctness(self):
num_samples = 100
Expand Down Expand Up @@ -885,6 +928,85 @@ def test_partial_batch(
)


class CompositeTensorDataAdapterTest(DataAdapterTestBase):
def setUp(self):
super().setUp()
self.adapter_cls = data_adapter.CompositeTensorDataAdapter

def test_composite_tensor_with_none_input(self):
x = [
tf.SparseTensor(
indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]
),
None,
]
y = np.zeros((3, 1), dtype=np.float32)
self.assertTrue(
data_adapter.CompositeTensorDataAdapter.can_handle(x, y)
)
adapter = data_adapter.CompositeTensorDataAdapter(
x, y, batch_size=2, shuffle=False
)
dataset = adapter.get_dataset()
self.assertEqual(adapter.get_size(), 2) # 3 samples, batch_size=2 -> 2
self.assertTrue(adapter.has_partial_batch())
self.assertEqual(adapter.partial_batch_size(), 1)

data = list(dataset)
self.assertEqual(len(data), 2)

x_batch, y_batch, _ = data_adapter.unpack_x_y_sample_weight(data[0])
self.assertIsInstance(x_batch, tuple)
self.assertEqual(x_batch[0].dense_shape.numpy().tolist(), [2, 4])
self.assertIsNone(x_batch[1])
self.assertEqual(y_batch.shape, (2, 1))

x_batch, y_batch, _ = data_adapter.unpack_x_y_sample_weight(data[1])
self.assertIsInstance(x_batch, tuple)
self.assertEqual(x_batch[0].dense_shape.numpy().tolist(), [1, 4])
self.assertIsNone(x_batch[1])
self.assertEqual(y_batch.shape, (1, 1))


class DatasetCreatorAdapterTest(DataAdapterTestBase):
def setUp(self):
super().setUp()
self.adapter_cls = data_adapter.DatasetCreatorAdapter

def test_with_none_input(self):
def dataset_fn(input_context=None):
del input_context
x_0 = np.ones((10, 1), dtype=np.float32)
y = np.zeros((10, 1), dtype=np.float32)
ds = tf.data.Dataset.from_tensor_slices((x_0, y))

def map_fn(x0, y):
return tf.data.Dataset.from_tensors(((x0, None), y))

ds = ds.flat_map(map_fn)
return ds.batch(2)

dc = dataset_creator.DatasetCreator(dataset_fn)
self.assertTrue(data_adapter.DatasetCreatorAdapter.can_handle(dc))
adapter = data_adapter.DatasetCreatorAdapter(
dc,
y=None,
steps=5,
distribution_strategy=tf.distribute.get_strategy(),
)
dataset = adapter.get_dataset()
self.assertIsNone(adapter.get_size())

for i, batch in enumerate(dataset):
x_batch, y_batch, _ = data_adapter.unpack_x_y_sample_weight(batch)
self.assertIsInstance(x_batch, tuple)
self.assertEqual(x_batch[0].shape, (2, 1))
self.assertIsNone(x_batch[1])
self.assertEqual(y_batch.shape, (2, 1))
if i >= 4:
break


class DatasetAdapterTest(DataAdapterTestBase):
def setUp(self):
super().setUp()
Expand Down
Loading
Loading