Basic Image Processing
In order to optimize the training efficiency and accuracy of our Deep Q Learning Model, the team preprocessed images to reduce the dimensionality of input images (prior to the development of our semantic segmentation model used in the final implementation). Native images from the OpenAI Retro emulator are 3-channel RGB images that measure 224 x 340 pixels. The team used methods from the openCV library to reduce the images to a single grayscale channel and reduce the image size to 84 x 84 pixels.
Images were converted to grayscale as it was hypothesized that we would not lose significant information by removing color. The images were downsampled to 84 x 84 pixels, utilizing bilinear interpolation. This image size was used as it is the native Atari image size, and much of the research behind our implementations stems from development of RL models using Atari platforms.


Our Code:
/source/vision/image_processing.py
1##---------------Sources-------------------------##
2# Image Processing for GymRetro: https://github.com/deepanshut041/Reinforcement-Learning
3##------------------------------------------------##
4
5import numpy as np
6import cv2 as cv
7import os
8import sys
9import time
10
11script_dir = os.path.dirname(os.path.abspath(__file__))
12project_dir = os.path.abspath(script_dir + "/../..")
13
14sys.path.append(os.path.abspath(project_dir + '/source/agents'))
15sys.path.append(os.path.abspath(project_dir + '/source/datasets'))
16sys.path.append(os.path.abspath(project_dir + '/source/interface'))
17sys.path.append(os.path.abspath(project_dir + '/source/vision'))
18
19from deeplab import *
20from deeplab_dataset import *
21from color import *
22from segmentation_labels import *
23
24def preprocess_frame(screen, seg_model=None):
25 """Preprocess Image.
26
27 Params
28 ======
29 screen (array): RGB Image
30 TODO
31 THESE ARE HARDCODED NOW, but worth breaking out into new methods later
32 exclude (tuple): Section to be croped (UP, RIGHT, DOWN, LEFT)
33 output (int): Size of output image
34 TODO
35 BLUR?
36 """
37
38 if seg_model is not None:
39 seg = seg_model
40 screen = seg.segment(screen)
41
42 else:
43 # convert image to gray scale
44 screen = cv.cvtColor(screen, cv.COLOR_RGB2GRAY)
45 # Scale down resolution to 1/4, remove color dimension
46 # So our network takes in a 56x80 matrix
47 # TODO
48 screen = cv.resize(screen.astype(float), (84, 84))
49 screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
50 #screen = np.ndarray.flatten(screen)
51 return screen
52
53
54def stack_frame(stacked_frames, frame, is_new):
55 """Stacking Frames.
56
57 Params
58 ======
59 stacked_frames (array): Four Channel Stacked Frame
60 frame: Preprocessed Frame to be added
61 is_new: Is the state First
62 """
63 if is_new:
64 stacked_frames = np.stack(arrays=[frame, frame, frame, frame])
65 stacked_frames = stacked_frames
66 else:
67 stacked_frames[0] = stacked_frames[1]
68 stacked_frames[1] = stacked_frames[2]
69 stacked_frames[2] = stacked_frames[3]
70 stacked_frames[3] = frame
71
72 return stacked_frames
73
74# Overlays pixels of an image src2 onto image src1.
75# Both images must be of the same size.
76# Which pixels of scr2 that get copied are determined by mask
77# For each pixels mask that equals 255, that cooresponding pixel of src2 is copied onto a pixel of src1
78#
79# ex:
80# src1 src2 mask
81# a b c 1 2 3 0 0 255
82# d e f 4 5 6 255 255 0
83#
84# result:
85# a b 3
86# 4 5 f
87#
88# bot bottom image
89# top top image (which will be overlayed onto src1)
90# mask which pixels are to be copied from top to bot
91# return image overlay of top and bot
92# mask determines which top pixels will be placed over the bottom pixels.
93def overlay_images(bot:np.ndarray, top:np.ndarray, mask:np.ndarray) -> np.ndarray:
94 top = cv.bitwise_and(top, top, mask=mask) # cut sillouette of top image
95
96 mask = cv.bitwise_not(mask) # invert
97
98 bot = cv.bitwise_and(bot, bot, mask=mask) # cut sillouette of bottom image
99
100 img = cv.add(bot, top)
101
102 return img
103
104# Returns a mask with all pixels of shade color labeled as true (255)
105# and all other pixels labeled as false (0)
106def mask_by_color(img:np.ndarray, color:Color, threshold=3) -> np.ndarray:
107
108 # slice original image by color components
109 img_b = img[:, :, 0] # blue pixel components
110 img_g = img[:, :, 1] # green pixel components
111 img_r = img[:, :, 2] # red pixel components
112
113 # Which pixels are part of the background (Which pixels should be made transparent)?
114 _, lower_mask_b = cv.threshold(img_b, color.blue-threshold, 255, cv.THRESH_BINARY)
115 _, upper_mask_b = cv.threshold(img_b, color.blue+threshold, 255, cv.THRESH_BINARY)
116
117 _, lower_mask_g = cv.threshold(img_g, color.green-threshold, 255, cv.THRESH_BINARY)
118 _, upper_mask_g = cv.threshold(img_g, color.green+threshold, 255, cv.THRESH_BINARY)
119
120 _, lower_mask_r = cv.threshold(img_r, color.red-threshold, 255, cv.THRESH_BINARY)
121 _, upper_mask_r = cv.threshold(img_r, color.red+threshold, 255, cv.THRESH_BINARY)
122
123 mask_b = cv.bitwise_xor(lower_mask_b, upper_mask_b)
124 mask_g = cv.bitwise_xor(lower_mask_g, upper_mask_g)
125 mask_r = cv.bitwise_xor(lower_mask_r, upper_mask_r)
126
127 # --- Finalize our Background and Foreground Masks ---
128 mask = cv.bitwise_and(mask_b, mask_g)
129 mask = cv.bitwise_and(mask, mask_r)
130
131 return mask
132
133def mask_by_intensity(img:np.ndarray, intensity:int) -> np.ndarray:
134 # Which pixels are part of the background (Which pixels should be made transparent)?
135 _, lower_mask = cv.threshold(img, intensity-1, 255, cv.THRESH_BINARY)
136 _, upper_mask = cv.threshold(img, intensity, 255, cv.THRESH_BINARY)
137
138 mask = cv.bitwise_xor(lower_mask, upper_mask)
139
140 return mask
141
142def draw_legend(img:np.ndarray) -> np.ndarray:
143 legend = np.zeros((100, 65, 3), dtype=np.uint8)
144
145 fontFace = cv.FONT_HERSHEY_SIMPLEX
146 fontScale = 0.4
147 thickness = 1
148 spacing = 12
149
150 row = 10
151
152 legend = cv.putText(
153 img=legend,
154 text="bg1",
155 org=(2, row),
156 fontFace=fontFace,
157 fontScale=fontScale,
158 thickness=thickness,
159 color = SegmentationLabels.BACKGROUND1_COLOR.toTuple(),
160 )
161
162 row += spacing
163
164 legend = cv.putText(
165 img=legend,
166 text="bg2",
167 org=(2, row),
168 fontFace=fontFace,
169 fontScale=fontScale,
170 thickness=thickness,
171 color = SegmentationLabels.BACKGROUND2_COLOR.toTuple(),
172 )
173
174 row += spacing
175
176 legend = cv.putText(
177 img=legend,
178 text="stage",
179 org=(2, row),
180 fontFace=fontFace,
181 fontScale=fontScale,
182 thickness=thickness,
183 color = SegmentationLabels.STAGE_COLOR.toTuple(),
184 )
185
186 row += spacing
187
188 legend = cv.putText(
189 img=legend,
190 text="sonic",
191 org=(2, row),
192 fontFace=fontFace,
193 fontScale=fontScale,
194 thickness=thickness,
195 color = SegmentationLabels.SONIC_COLOR.toTuple(),
196 )
197
198 row += spacing
199
200 legend = cv.putText(
201 img=legend,
202 text="robots",
203 org=(2, row),
204 fontFace=fontFace,
205 fontScale=fontScale,
206 thickness=thickness,
207 color = SegmentationLabels.ROBOTS_COLOR.toTuple(),
208 )
209
210 row += spacing
211
212 legend = cv.putText(
213 img=legend,
214 text="items",
215 org=(2, row),
216 fontFace=fontFace,
217 fontScale=fontScale,
218 thickness=thickness,
219 color = SegmentationLabels.ITEMS_COLOR.toTuple(),
220 )
221
222 row += spacing
223
224 legend = cv.putText(
225 img=legend,
226 text="hazards",
227 org=(2, row),
228 fontFace=fontFace,
229 fontScale=fontScale,
230 thickness=thickness,
231 color = SegmentationLabels.HAZARDS_COLOR.toTuple(),
232 )
233
234 row += spacing
235
236 legend = cv.putText(
237 img=legend,
238 text="mechanical",
239 org=(2, row),
240 fontFace=fontFace,
241 fontScale=fontScale,
242 thickness=thickness,
243 color = SegmentationLabels.MECHANICAL_COLOR.toTuple(),
244 )
245
246 row = 10
247 col = 10
248 rows = legend.shape[0]
249 cols = legend.shape[1]
250
251 img[row:row+rows, col:col+cols, :] = legend
252
253 return img