1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
|
from pickletools import int4
from torch.utils import data
from typing import Tuple, Union, List
import numpy as np
import json
import math
import cv2
import h5py
import os
import pickle
import sys
import yaml
import warnings
from PIL import Image
from einops import reduce, rearrange, repeat
import torch as th
class BouncingBallDataset(data.Dataset):
def __init__(self, root_path: str, dataset_name: str, type: str, size: Tuple[int, int], type_name: str = None, full_size: Tuple[int, int] = None, create_dataset: bool = False):
assert type in ["train", "test", "val"]
assert type_name in ["interaction", "occlusion", "twolayer", "twolayerdense", "twolayer_ood", "threelayer_ood", "twolayer_ood_3balls"]
data_path = f'data/data/video/{dataset_name}'
data_path = os.path.join(root_path, data_path)
self.file = os.path.join(data_path, f'balls_{type_name}-{type}-{size[0]}x{size[1]}-v1.hdf5')
self.train = (type == "train")
self.samples = []
if os.path.exists(self.file):
self.hdf5_file = h5py.File(self.file, "r")
# load dataset
self.length = self.hdf5_file['sequence_indices'].shape[0]
self.background = np.zeros((3, size[0], size[1]), dtype=np.uint8)
# set number of objects
if (type_name == "twolayer") or (type_name == "threelayer_ood" and type != "test"):
self.num_objects = 6
elif (type_name == "twolayer_ood" and type == "test") or (type_name == "twolayer_ood_3balls" and type == "test"):
self.num_objects = 4
elif type_name == "twolayer_ood":
self.num_objects = 2
elif type_name == "twolayer_ood_3balls":
self.num_objects = 3
elif (type_name == "threelayer_ood" and type == "test"):
self.num_objects = 9
else:
self.num_objects = 3
if len(self) == 0:
raise FileNotFoundError(f'Found no dataset at {data_path}')
# loop trough own dataset by calling __getitem__
if False:
for i in range(len(self)):
self[i]
def add_one_timestep(self, x):
return np.concatenate((x, np.zeros_like(x[:1])), axis=0)
def __len__(self):
return self.length
def __getitem__(self, index: int):
index_start, length = self.hdf5_file['sequence_indices'][index]
rgb_images = self.hdf5_file["rgb_images"][index_start:index_start+length]
if rgb_images[0].dtype == np.uint8:
images = []
for i in range(len(rgb_images)):
img = cv2.imdecode(rgb_images[i], 1)
images.append(img.transpose(2, 0, 1).astype(np.float32) / 255.0)
rgb_images = np.stack(images)
rgb_images = th.from_numpy(rgb_images)
if self.train:
return (
rgb_images,
self.background
)
# EVALUATION
num_objects = self.num_objects
instance_positions = self.hdf5_file['instance_positions'][index_start*num_objects:(index_start+length)*num_objects]
instance_positions = rearrange(instance_positions, '(t o) c -> t o c', o=num_objects)
instance_positions = instance_positions[:, :, ::-1] # IMPORTANT: flip x and y axis
instance_pres = self.hdf5_file['instance_incamera'][index_start*num_objects:(index_start+length)*num_objects]
instance_pres = rearrange(instance_pres, '(t o) c -> t o c', o=num_objects).squeeze(-1)
instance_bounding_boxes = self.hdf5_file['instance_mask_bboxes'][index_start*num_objects:(index_start+length)*num_objects]
instance_bounding_boxes = rearrange(instance_bounding_boxes, '(t o) c -> t o c', o=num_objects)
instance_bounding_boxes = instance_bounding_boxes[:, :, [1, 0, 3, 2]]
foreground_mask = self.hdf5_file['foreground_mask'][index_start:(index_start+length)]
foreground_mask = rearrange(foreground_mask, 't 1 h w -> t h w')/255
instance_masks = self.hdf5_file['instance_masks'][index_start*num_objects:(index_start+length)*num_objects]
instance_masks = rearrange(instance_masks, '(t o) 1 h w -> t o 1 h w', o=num_objects).squeeze()/255
# CUSTOM
# use instance masks to to create hidden masks
hidden_mask = reduce(instance_masks, 't o h w -> t 1 h w', 'sum').squeeze()
hidden_mask = (hidden_mask > 1).astype(np.uint8)
# segmentation_masks: index gives which object is visible at that pixel
segmentation_mask = np.argmax(instance_masks[:, ::-1], axis=1) + 1
segmentation_mask = foreground_mask * segmentation_mask
# segmentation mask but only for hidden objects
segementation_mask_hidden = np.argmax(instance_masks[:, :3], axis=1) + 1 # TODO only works for 6 objects
segementation_mask_hidden = hidden_mask * segementation_mask_hidden
# add one dummy timestep at the end
instance_positions = self.add_one_timestep(instance_positions)
rgb_images = self.add_one_timestep(rgb_images)
foreground_mask = self.add_one_timestep(foreground_mask)
hidden_mask = self.add_one_timestep(hidden_mask)
instance_pres = self.add_one_timestep(instance_pres)
instance_bounding_boxes = self.add_one_timestep(instance_bounding_boxes)
instance_masks = self.add_one_timestep(instance_masks)
segmentation_mask = self.add_one_timestep(segmentation_mask)
segementation_mask_hidden = self.add_one_timestep(segementation_mask_hidden)
if False:
video = np.array(rgb_images)
locations = np.array(instance_positions)
fg_masks = np.array(foreground_mask)
bb = np.array(instance_bounding_boxes)
h_masks = np.array(hidden_mask)
# loop through video frames and show them using cv2
for t in range(video.shape[0]):
frame = rearrange(video[t], 'c h w -> h w c') * 255
for loc in locations[t]:
x, y = loc
#cv2.circle(frame, (int(x), int(y)), 2, (255, 0, 0), -1) # did not work properly
x_max = int(min(int(x + 2), frame.shape[1]))
x_min = int(max(int(x - 2), 0))
y_max = int(min(int(y + 2), frame.shape[0]))
y_min = int(max(int(y - 2), 0))
frame[x_min:x_max, y_min:y_max] = [255, 0, 0]
# draw the bounding boxes into the frame
for i, b in enumerate(bb[t]):
x_min, y_min, x_max, y_max = b
x_min = int(max(x_min, 0))
y_min = int(max(y_min, 0))
x_max = int(min(x_max, frame.shape[0]-1))
y_max = int(min(y_max, frame.shape[0]-1))
# dont't use c2 rectangeel function here but draw it manually
for pixel in range(x_min, x_max):
frame[pixel, y_min, 0] = 255
frame[pixel, y_max, 0] = 255
for pixel in range(y_min, y_max):
frame[x_min, pixel, 0] = 255
frame[x_max, pixel, 0] = 255
fg_mask = repeat(fg_masks[t], 'h w -> h w 3') * 255
h_mask = repeat(h_masks[t], 'h w -> h w 3') * 255
s_mask = repeat(segmentation_mask[t], 'h w -> h w 3') * (255/6)
s_mask_hidden = repeat(segementation_mask_hidden[t], 'h w -> h w 3') * (255/3)
frame = np.concatenate((frame, fg_mask, s_mask, h_mask, s_mask_hidden), axis=1)
# add instance masks to visualisation
for i, mask in enumerate(instance_masks[t]):
mask = repeat(mask, 'h w -> h w 3') * 255
# add border to the right side
mask[:, -1, 0] = 255
frame = np.concatenate((frame, mask), axis=1)
frame = frame.astype(np.uint8)
cv2.imshow('frame', frame)
cv2.waitKey(0)
return (
rgb_images,
self.background,
instance_positions,
segmentation_mask,
instance_pres,
segementation_mask_hidden
)
#a = BouncingBallDataset("./", 'BOUNCINGBALLS', "train", (64,64))
|