Basic Image Processing

In order to optimize the training efficiency and accuracy of our Deep Q Learning Model, the team preprocessed images to reduce the dimensionality of input images (prior to the development of our semantic segmentation model used in the final implementation). Native images from the OpenAI Retro emulator are 3-channel RGB images that measure 224 x 340 pixels. The team used methods from the openCV library to reduce the images to a single grayscale channel and reduce the image size to 84 x 84 pixels.

Images were converted to grayscale as it was hypothesized that we would not lose significant information by removing color. The images were downsampled to 84 x 84 pixels, utilizing bilinear interpolation. This image size was used as it is the native Atari image size, and much of the research behind our implementations stems from development of RL models using Atari platforms.

Our Code:

/source/vision/image_processing.py

1##---------------Sources-------------------------##
2# Image Processing for GymRetro:  https://github.com/deepanshut041/Reinforcement-Learning 
3##------------------------------------------------##
4
5import numpy as np
6import cv2 as cv
7import os
8import sys
9import time
10
11script_dir = os.path.dirname(os.path.abspath(__file__))
12project_dir = os.path.abspath(script_dir + "/../..")
13
14sys.path.append(os.path.abspath(project_dir + '/source/agents'))
15sys.path.append(os.path.abspath(project_dir + '/source/datasets'))
16sys.path.append(os.path.abspath(project_dir + '/source/interface'))
17sys.path.append(os.path.abspath(project_dir + '/source/vision'))
18
19from deeplab import *
20from deeplab_dataset import *
21from color import *
22from segmentation_labels import *
23
24def preprocess_frame(screen, seg_model=None):
25    """Preprocess Image.
26        
27        Params
28        ======
29            screen (array): RGB Image
30            TODO   
31            THESE ARE HARDCODED NOW, but worth breaking out into new methods later
32            exclude (tuple): Section to be croped (UP, RIGHT, DOWN, LEFT)
33            output (int): Size of output image
34            TODO
35            BLUR?
36    """
37
38    if seg_model is not None:
39        seg = seg_model
40        screen = seg.segment(screen)
41
42    else:
43        # convert image to gray scale
44        screen = cv.cvtColor(screen, cv.COLOR_RGB2GRAY)
45    # Scale down resolution to 1/4, remove color dimension
46    # So our network takes in a 56x80 matrix    
47    # TODO 
48    screen = cv.resize(screen.astype(float), (84, 84))
49    screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
50    #screen = np.ndarray.flatten(screen)       
51    return screen
52
53
54def stack_frame(stacked_frames, frame, is_new):
55    """Stacking Frames.
56        
57        Params
58        ======
59            stacked_frames (array): Four Channel Stacked Frame
60            frame: Preprocessed Frame to be added
61            is_new: Is the state First
62        """
63    if is_new:
64        stacked_frames = np.stack(arrays=[frame, frame, frame, frame])
65        stacked_frames = stacked_frames
66    else:
67        stacked_frames[0] = stacked_frames[1]
68        stacked_frames[1] = stacked_frames[2]
69        stacked_frames[2] = stacked_frames[3]
70        stacked_frames[3] = frame
71    
72    return stacked_frames
73    
74# Overlays pixels of an image src2 onto image src1.
75# Both images must be of the same size.
76# Which pixels of scr2 that get copied are determined by mask
77# For each pixels mask that equals 255, that cooresponding pixel of src2 is copied onto a pixel of src1
78# 
79# ex:
80#	src1		src2		mask
81#	a b c		1 2 3		0   0   255
82#	d e f		4 5 6		255 255 0
83#
84#	result:
85#	a b 3
86#	4 5 f	
87# 
88# bot		bottom image
89# top		top image (which will be overlayed onto src1)
90# mask	which pixels are to be copied from top to bot
91# return	image overlay of top and bot
92# mask determines which top pixels will be placed over the bottom pixels.
93def overlay_images(bot:np.ndarray, top:np.ndarray, mask:np.ndarray) -> np.ndarray:
94    top = cv.bitwise_and(top, top, mask=mask)		# cut sillouette of top image
95        
96    mask = cv.bitwise_not(mask)						# invert
97        
98    bot = cv.bitwise_and(bot, bot, mask=mask)		# cut sillouette of bottom image
99    
100    img = cv.add(bot, top)
101    
102    return img
103
104# Returns a mask with all pixels of shade color labeled as true (255)
105#	and all other pixels labeled as false (0)
106def mask_by_color(img:np.ndarray, color:Color, threshold=3) -> np.ndarray:
107    
108    # slice original image by color components
109    img_b = img[:, :, 0]	# blue pixel components
110    img_g = img[:, :, 1]	# green pixel components
111    img_r = img[:, :, 2]	# red pixel components
112            
113    # Which pixels are part of the background (Which pixels should be made transparent)?
114    _, lower_mask_b = cv.threshold(img_b, color.blue-threshold, 255, cv.THRESH_BINARY)
115    _, upper_mask_b = cv.threshold(img_b, color.blue+threshold, 255, cv.THRESH_BINARY)
116            
117    _, lower_mask_g = cv.threshold(img_g, color.green-threshold, 255, cv.THRESH_BINARY)
118    _, upper_mask_g = cv.threshold(img_g, color.green+threshold, 255, cv.THRESH_BINARY)
119            
120    _, lower_mask_r = cv.threshold(img_r, color.red-threshold, 255, cv.THRESH_BINARY)
121    _, upper_mask_r = cv.threshold(img_r, color.red+threshold, 255, cv.THRESH_BINARY)
122            
123    mask_b = cv.bitwise_xor(lower_mask_b, upper_mask_b)
124    mask_g = cv.bitwise_xor(lower_mask_g, upper_mask_g)
125    mask_r = cv.bitwise_xor(lower_mask_r, upper_mask_r)
126            
127    # --- Finalize our Background and Foreground Masks ---
128    mask = cv.bitwise_and(mask_b, mask_g)
129    mask = cv.bitwise_and(mask, mask_r)
130    
131    return mask
132        
133def mask_by_intensity(img:np.ndarray, intensity:int) -> np.ndarray:
134    # Which pixels are part of the background (Which pixels should be made transparent)?
135    _, lower_mask = cv.threshold(img, intensity-1, 255, cv.THRESH_BINARY)
136    _, upper_mask = cv.threshold(img, intensity, 255, cv.THRESH_BINARY)
137            
138    mask = cv.bitwise_xor(lower_mask, upper_mask)
139    
140    return mask
141
142def draw_legend(img:np.ndarray) -> np.ndarray:
143    legend = np.zeros((100, 65, 3), dtype=np.uint8)
144
145    fontFace = cv.FONT_HERSHEY_SIMPLEX
146    fontScale = 0.4
147    thickness = 1
148    spacing = 12
149
150    row = 10
151
152    legend = cv.putText(
153        img=legend,
154        text="bg1",
155        org=(2, row),
156        fontFace=fontFace,
157        fontScale=fontScale,
158        thickness=thickness,
159        color = SegmentationLabels.BACKGROUND1_COLOR.toTuple(),
160    )
161
162    row += spacing
163
164    legend = cv.putText(
165        img=legend,
166        text="bg2",
167        org=(2, row),
168        fontFace=fontFace,
169        fontScale=fontScale,
170        thickness=thickness,
171        color = SegmentationLabels.BACKGROUND2_COLOR.toTuple(),
172    )
173    
174    row += spacing
175
176    legend = cv.putText(
177        img=legend,
178        text="stage",
179        org=(2, row),
180        fontFace=fontFace,
181        fontScale=fontScale,
182        thickness=thickness,
183        color = SegmentationLabels.STAGE_COLOR.toTuple(),	
184    )
185    
186    row += spacing
187
188    legend = cv.putText(
189        img=legend,
190        text="sonic",
191        org=(2, row),
192        fontFace=fontFace,
193        fontScale=fontScale,
194        thickness=thickness,
195        color = SegmentationLabels.SONIC_COLOR.toTuple(),	
196    )
197
198    row += spacing
199
200    legend = cv.putText(
201        img=legend,
202        text="robots",
203        org=(2, row),
204        fontFace=fontFace,
205        fontScale=fontScale,
206        thickness=thickness,
207        color = SegmentationLabels.ROBOTS_COLOR.toTuple(),	
208    )
209
210    row += spacing
211    
212    legend = cv.putText(
213        img=legend,
214        text="items",
215        org=(2, row),
216        fontFace=fontFace,
217        fontScale=fontScale,
218        thickness=thickness,
219        color = SegmentationLabels.ITEMS_COLOR.toTuple(),	
220    )
221    
222    row += spacing
223
224    legend = cv.putText(
225        img=legend,
226        text="hazards",
227        org=(2, row),
228        fontFace=fontFace,
229        fontScale=fontScale,
230        thickness=thickness,
231        color = SegmentationLabels.HAZARDS_COLOR.toTuple(),	
232    )
233    
234    row += spacing
235
236    legend = cv.putText(
237        img=legend,
238        text="mechanical",
239        org=(2, row),
240        fontFace=fontFace,
241        fontScale=fontScale,
242        thickness=thickness,
243        color = SegmentationLabels.MECHANICAL_COLOR.toTuple(),	
244    )
245
246    row = 10
247    col = 10
248    rows = legend.shape[0]
249    cols = legend.shape[1]
250
251    img[row:row+rows, col:col+cols, :] = legend
252
253    return img