From f987f976bd416759764bc86bc3aea3722391ce3b Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 22 Oct 2019 11:39:41 +0200 Subject: [PATCH 01/10] Eye: activate Parvo/Magno vision based on provided settings depends on params sparsityXXX > 0 --- py/htm/encoders/eye.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index 2fff41f23c..390104bacf 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -177,6 +177,11 @@ class Eye: Simulates functionality of eye's retinal parvocellular(P-cells), and magnocellular(M-cells) pathways, at the saccadic steps. + Based on OpenCV's cv2.bioinspired.Retina model: + https://docs.opencv.org/3.4/d2/d94/bioinspired_retina.html + http://web.iitd.ac.in/~sumeet/Modelling_Vision.pdf + + On high level, magno cells: - detect change in temporal information in the image, ie motion @@ -238,7 +243,7 @@ def __init__(self, motion detection and motion tracking, video processing. For details see @param `sparsityParvo`. TODO: output of M-cells should be processed on a fast TM. - Argument color: use color vision (requires P-cells > 0), default true. + Argument color: use color vision (requires P-cells > 0), default true. (Grayscale is faster) """ self.output_diameter = output_diameter # Argument resolution_factor is used to expand the sensor array so that @@ -268,6 +273,10 @@ def __init__(self, colorMode = color, colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER,) + # Activate Parvo/Magno vision based on whether sparsityXXX is set. + self.retina.activateContoursProcessing(sparsityParvo > 0) # Parvo + self.retina.activateMovingContoursProcessing(sparsityMagno > 0) # Magno + print(self.retina.printSetup()) print() From e46d5f39fba5217126e45bf62d42bbaacd63c9d4 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 22 Oct 2019 14:06:55 +0200 Subject: [PATCH 02/10] Eye: apply log-polar transform from Retion instead of doing it manually --- py/htm/encoders/eye.py | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index 390104bacf..9e47207cfd 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -48,8 +48,8 @@ class ChannelEncoder: 1. Semantic similarity happens when two inputs which are similar have similar SDR representations. This encoder design does two things to cause semantic similarity: - (1) SDR bits are responsive to a range of input values, - and (2) topology allows near by bits to represent similar things. + (1) SDR bits are responsive to a range of input values, and + (2) topology allows nearby bits to represent similar things. Many encoders apply thresholds to real valued input data to convert the input into Boolean outputs. In this encoder uses two thresholds to form ranges which @@ -246,9 +246,6 @@ def __init__(self, Argument color: use color vision (requires P-cells > 0), default true. (Grayscale is faster) """ self.output_diameter = output_diameter - # Argument resolution_factor is used to expand the sensor array so that - # the fovea has adequate resolution. After log-polar transform image - # is reduced by this factor back to the output_diameter. self.resolution_factor = 3 self.retina_diameter = int(self.resolution_factor * output_diameter) # Argument fovea_scale ... represents "zoom" aka distance from the object/image. @@ -271,7 +268,11 @@ def __init__(self, self.retina = cv2.bioinspired.Retina_create( inputSize = (self.retina_diameter, self.retina_diameter), colorMode = color, - colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER,) + colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER, + useRetinaLogSampling = True, + reductionFactor = self.resolution_factor, # how much is the image under-sampled #TODO tune these params + samplingStrenght = 4.0, # how much are the corners blured/forgotten + ) # Activate Parvo/Magno vision based on whether sparsityXXX is set. self.retina.activateContoursProcessing(sparsityParvo > 0) # Parvo @@ -385,6 +386,7 @@ def _crop_roi(self): """ assert(self.image is not None) + r = int(round(self.scale * self.retina_diameter / 2)) x, y = self.position x = int(round(x)) @@ -443,26 +445,10 @@ def compute(self, position=None, rotation=None, scale=None): self.retina.run(self.roi) if self.parvo_enc is not None: parvo = self.retina.getParvo() + print("RAW", parvo.shape) if self.magno_enc is not None: magno = self.retina.getMagno() - # Log Polar Transform. - center = self.retina_diameter / 2 - M = self.retina_diameter * self.fovea_scale - if self.parvo_enc is not None: - parvo = cv2.logPolar(parvo, - center = (center, center), - M = M, - flags = cv2.WARP_FILL_OUTLIERS) - parvo = cv2.resize(parvo, dsize=(self.output_diameter, self.output_diameter), interpolation = cv2.INTER_CUBIC) - - if self.magno_enc is not None: - magno = cv2.logPolar(magno, - center = (center, center), - M = M, - flags = cv2.WARP_FILL_OUTLIERS) - magno = cv2.resize(magno, dsize=(self.output_diameter, self.output_diameter), interpolation = cv2.INTER_CUBIC) - # Apply rotation by rolling the images around axis 1. rotation = self.output_diameter * self.orientation / (2 * math.pi) rotation = int(round(rotation)) @@ -592,14 +578,17 @@ def _get_images(path): eye = Eye() for img_path in images: eye.reset() + eye.fovea_scale = 0.2 print("Loading image %s"%img_path) eye.new_image(img_path) - eye.scale = 1 - eye.center_view() + #eye.center_view() + #manually set position to look at head: + eye.position = (400, 400) for i in range(10): pos,rot,sc = eye.small_random_movement() + sc = 1 (sdrParvo, sdrMagno) = eye.compute(pos,rot,sc) #TODO derive from Encoder - eye.plot(500) + eye.plot(5000) print("Sparsity parvo: {}".format(len(eye.parvo_sdr.sparse)/np.product(eye.parvo_sdr.dimensions))) print("Sparsity magno: {}".format(len(eye.magno_sdr.sparse)/np.product(eye.magno_sdr.dimensions))) print("All images seen.") From b4fa78e101d6c2d3206e2cfd0a42bd1d3516967a Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 22 Oct 2019 14:30:40 +0200 Subject: [PATCH 03/10] Eye: refactoring replace self.output_diameter with self.retina.getOutputSize() replace self.retina_diameter with self.retina.getInputSize() --- py/htm/encoders/eye.py | 53 +++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index 9e47207cfd..0204302fb7 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -223,7 +223,7 @@ class Eye: def __init__(self, - output_diameter = 200, # output SDR size is diameter^2 + output_diameter = 200, # fovea image size, also approximately output SDR size (= diameter^2) sparsityParvo = 0.2, sparsityMagno = 0.025, color = True,): @@ -245,20 +245,19 @@ def __init__(self, TODO: output of M-cells should be processed on a fast TM. Argument color: use color vision (requires P-cells > 0), default true. (Grayscale is faster) """ - self.output_diameter = output_diameter - self.resolution_factor = 3 - self.retina_diameter = int(self.resolution_factor * output_diameter) + resolution_factor = 3 + retina_diameter = int(resolution_factor * output_diameter) # Argument fovea_scale ... represents "zoom" aka distance from the object/image. self.fovea_scale = 0.177 assert(output_diameter // 2 * 2 == output_diameter) # Diameter must be an even number. - assert(self.retina_diameter // 2 * 2 == self.retina_diameter) # (Resolution Factor X Diameter) must be an even number. + assert(retina_diameter // 2 * 2 == retina_diameter) # (Resolution Factor X Diameter) must be an even number. assert(sparsityParvo >= 0 and sparsityParvo <= 1.0) if sparsityParvo > 0: - assert(sparsityParvo * (self.retina_diameter **2) > 0) + assert(sparsityParvo * (retina_diameter **2) > 0) self.sparsityParvo = sparsityParvo assert(sparsityMagno >= 0 and sparsityMagno <= 1.0) if sparsityMagno > 0: - assert(sparsityMagno * (self.retina_diameter **2) > 0) + assert(sparsityMagno * (retina_diameter **2) > 0) self.sparsityMagno = sparsityMagno if color is True: assert(sparsityParvo > 0) @@ -266,11 +265,11 @@ def __init__(self, self.retina = cv2.bioinspired.Retina_create( - inputSize = (self.retina_diameter, self.retina_diameter), + inputSize = (retina_diameter, retina_diameter), colorMode = color, colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER, useRetinaLogSampling = True, - reductionFactor = self.resolution_factor, # how much is the image under-sampled #TODO tune these params + reductionFactor = resolution_factor, # how much is the image under-sampled #TODO tune these params samplingStrenght = 4.0, # how much are the corners blured/forgotten ) @@ -282,11 +281,11 @@ def __init__(self, print() if sparsityParvo > 0: - dims = (output_diameter, output_diameter) + dims = self.retina.getOutputSize() sparsityP_ = sparsityParvo - if color is True: - dims = (output_diameter, output_diameter, 3,) #3 for RGB color channels + if color is True: + dims = dims +(3,) #append 3rd dim with value '3' for RGB color channels # The reason the parvo-cellular has `3rd-root of the sparsity` is that there are three color channels (RGB), # each of which is encoded separately and then combined. The color channels are combined with a logical AND, @@ -305,7 +304,7 @@ def __init__(self, if sparsityMagno > 0: self.magno_enc = ChannelEncoder( - input_shape = (output_diameter, output_diameter), + input_shape = self.retina.getOutputSize(), num_samples = 1, sparsity = sparsityMagno, dtype=np.uint8, drange=[0, 255],) @@ -315,10 +314,10 @@ def __init__(self, # output variables: self.image = None # the current input RGB image self.roi = None # self.image cropped to region of interest - self.parvo_img = None # output visualization of parvo/magno cells - self.magno_img = None - self.parvo_sdr = SDR((output_diameter, output_diameter,)) # parvo/magno cellular representation (SDR) - self.magno_sdr = SDR((output_diameter, output_diameter,)) + self.parvo_img = np.zeros(self.retina.getOutputSize()) # output visualization of parvo/magno cells + self.magno_img = np.zeros(self.retina.getOutputSize()) + self.parvo_sdr = SDR(self.retina.getOutputSize()) # parvo/magno cellular representation (SDR) + self.magno_sdr = SDR(self.retina.getOutputSize()) def new_image(self, image): @@ -355,16 +354,16 @@ def center_view(self): """Center the view over the image""" self.orientation = 0 self.position = (self.image.shape[0]/2., self.image.shape[1]/2.) - self.scale = np.min(np.divide(self.image.shape[:2], self.retina_diameter)) + self.scale = np.min(np.divide(self.image.shape[:2], self.retina.getInputSize()[0])) def randomize_view(self, scale_range=None): """Set the eye's view point to a random location""" if scale_range is None: - scale_range = [2, min(self.image.shape[:2]) / self.retina_diameter] + scale_range = [2, min(self.image.shape[:2]) / self.retina.getInputSize()[0]] assert(len(scale_range) == 2) self.orientation = random.uniform(0, 2 * math.pi) self.scale = random.uniform(min(scale_range), max(scale_range)) - roi_radius = self.scale * self.retina_diameter / 2 + roi_radius = self.scale * self.retina.getInputSize()[0] / 2 self.position = [random.uniform(roi_radius, dim - roi_radius) for dim in self.image.shape[:2]] @@ -373,7 +372,7 @@ def _crop_roi(self): Crop to Region Of Interest (ROI) which contains the whole field of view. Adds a black circular boarder to mask out areas which the eye can't see. - Note: size of the ROI is (eye.output_diameter * eye.resolution_factor). + Note: size of the ROI is (eye.retina.getOutputSize()[0] * resolution_factor). Note: the circular boarder is actually a bit too far out, playing with eye.fovea_scale can hide areas which this ROI image will show. @@ -387,7 +386,7 @@ def _crop_roi(self): assert(self.image is not None) - r = int(round(self.scale * self.retina_diameter / 2)) + r = int(round(self.scale * self.retina.getInputSize()[0] / 2)) x, y = self.position x = int(round(x)) y = int(round(y)) @@ -414,7 +413,9 @@ def _crop_roi(self): roi[x_offset:x_offset+x_shape, y_offset:y_offset+y_shape] = image_slice # Rescale the ROI to remove the scaling effect. - roi.resize( (self.retina_diameter, self.retina_diameter, 3)) + inDims_ = self.retina.getInputSize() + inDims_ = inDims_ + (3,) #add 3rd dim '3' for RGB + roi.resize( inDims_ ) # Mask out areas the eye can't see by drawing a circle boarder. center = int(roi.shape[0] / 2) @@ -450,7 +451,7 @@ def compute(self, position=None, rotation=None, scale=None): magno = self.retina.getMagno() # Apply rotation by rolling the images around axis 1. - rotation = self.output_diameter * self.orientation / (2 * math.pi) + rotation = self.retina.getOutputSize()[0] * self.orientation / (2 * math.pi) rotation = int(round(rotation)) if self.parvo_enc is not None: self.parvo_img = np.roll(parvo, rotation, axis=0) @@ -513,9 +514,9 @@ def plot(self, window_name='Eye', delay=1000): else: cv2.imshow('Parvocellular', self.parvo_img) cv2.imshow('Magnocellular', self.magno_img) - idx = self.parvo_sdr.dense.astype(np.uint8).reshape(self.output_diameter, self.output_diameter)*255 + idx = self.parvo_sdr.dense.astype(np.uint8).reshape(self.retina.getOutputSize())*255 cv2.imshow('Parvo SDR', idx) - idx = self.magno_sdr.dense.astype(np.uint8).reshape(self.output_diameter, self.output_diameter)*255 + idx = self.magno_sdr.dense.astype(np.uint8).reshape(self.retina.getOutputSize())*255 cv2.imshow('Magno SDR', idx) cv2.waitKey(delay) From 06ecee105b1090a21e2ec00d67e229ee9b0c4066 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 22 Oct 2019 15:50:10 +0200 Subject: [PATCH 04/10] Eye: compute takes image as argument require same dims for all images make new_image_() a static method, used internally by compute() --- py/htm/encoders/eye.py | 81 ++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index 0204302fb7..270fa16d14 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -158,7 +158,7 @@ def __init__(self, input_shape, num_samples, sparsity, def encode(self, img): """Returns a dense boolean np.ndarray.""" - assert(img.shape == self.input_shape) + assert(img.shape == self.input_shape),print("Channel: img must have same dims as input_shape:", img.shape, self.input_shape) assert(img.dtype == self.dtype) if self.wrap: img += self.offsets @@ -223,11 +223,14 @@ class Eye: def __init__(self, + inputShape, output_diameter = 200, # fovea image size, also approximately output SDR size (= diameter^2) sparsityParvo = 0.2, sparsityMagno = 0.025, color = True,): """ + Argument inputShape - shape of the input image(s). + The images, video frames must have the same shape. Argument output_diameter is size of output ... output is a field of view (image) with circular shape. Default 200. `parvo/magno_sdr` size is `output_diameter^2` @@ -245,31 +248,27 @@ def __init__(self, TODO: output of M-cells should be processed on a fast TM. Argument color: use color vision (requires P-cells > 0), default true. (Grayscale is faster) """ - resolution_factor = 3 - retina_diameter = int(resolution_factor * output_diameter) + assert(len(inputShape) == 2) + self.inputShape = inputShape # Argument fovea_scale ... represents "zoom" aka distance from the object/image. self.fovea_scale = 0.177 assert(output_diameter // 2 * 2 == output_diameter) # Diameter must be an even number. - assert(retina_diameter // 2 * 2 == retina_diameter) # (Resolution Factor X Diameter) must be an even number. assert(sparsityParvo >= 0 and sparsityParvo <= 1.0) - if sparsityParvo > 0: - assert(sparsityParvo * (retina_diameter **2) > 0) self.sparsityParvo = sparsityParvo assert(sparsityMagno >= 0 and sparsityMagno <= 1.0) - if sparsityMagno > 0: - assert(sparsityMagno * (retina_diameter **2) > 0) self.sparsityMagno = sparsityMagno if color is True: assert(sparsityParvo > 0) self.color = color - + reductionFactor_ = inputShape[0]/output_diameter #TODO how would this work with non-square images? + assert(reductionFactor_ >= 1.0) self.retina = cv2.bioinspired.Retina_create( - inputSize = (retina_diameter, retina_diameter), + inputSize = inputShape, colorMode = color, colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER, useRetinaLogSampling = True, - reductionFactor = resolution_factor, # how much is the image under-sampled #TODO tune these params + reductionFactor = reductionFactor_, # how much is the image under-sampled #TODO tune these params samplingStrenght = 4.0, # how much are the corners blured/forgotten ) @@ -312,43 +311,49 @@ def __init__(self, self.magno_enc = None # output variables: - self.image = None # the current input RGB image + self.image = np.zeros(self.retina.getInputSize()) # the current input RGB image self.roi = None # self.image cropped to region of interest self.parvo_img = np.zeros(self.retina.getOutputSize()) # output visualization of parvo/magno cells self.magno_img = np.zeros(self.retina.getOutputSize()) self.parvo_sdr = SDR(self.retina.getOutputSize()) # parvo/magno cellular representation (SDR) self.magno_sdr = SDR(self.retina.getOutputSize()) + + # Motor-control variables (to be set by user): + self.orientation = 0 #in degrees + self.position = (0,0) + self.scale = 1.0 - def new_image(self, image): + def new_image_(image): """ Argument image ... If String, will load image from file path. If numpy.ndarray, will attempt to cast to correct data type and dimensions. + + Return: the new image ndarray (only useful if string is passed in) """ # Load image if needed. if isinstance(image, str): - self.image = cv2.imread(image) - self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB) + image = cv2.imread(image) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) else: - self.image = image + image = image # Get the image into the right format. - assert(isinstance(self.image, np.ndarray)) - if self.image.dtype != np.uint8: - raise TypeError('Image "%s" dtype is not unsigned 8 bit integer, image.dtype is %s.'%( - self.image.dtype)) + assert(isinstance(image, np.ndarray)) + assert(image.dtype == np.uint8), print( + 'Image "%s" dtype is not unsigned 8 bit integer, image.dtype is %s.'%(image.dtype)) # Ensure there are three color channels. - if len(self.image.shape) == 2 or self.image.shape[2] == 1: - self.image = np.dstack([self.image] * 3) + if len(image.shape) == 2 or image.shape[2] == 1: + image = np.dstack([image] * 3) # Drop the alpha channel if present. - elif self.image.shape[2] == 4: - self.image = self.image[:,:,:3] + elif image.shape[2] == 4: + image = image[:,:,:3] # Sanity checks. - assert(len(self.image.shape) == 3) - assert(self.image.shape[2] == 3) # Color images only. - self.reset() - self.center_view() + assert(len(image.shape) == 3) + assert(image.shape[2] == 3) # Color images only. + return image + def center_view(self): """Center the view over the image""" @@ -425,8 +430,10 @@ def _crop_roi(self): return roi - def compute(self, position=None, rotation=None, scale=None): + def compute(self, image, position=None, rotation=None, scale=None): """ + Argument image - string (to load) or numpy.ndarray with image data + Images must match retina's inputShape, so be all of the same dimensions. Arguments position, rotation, scale: optional, if not None, the self.xxx is overriden with the provided value. Returns tuple (SDR parvo, SDR magno) @@ -440,9 +447,13 @@ def compute(self, position=None, rotation=None, scale=None): self.scale=scale # apply field of view (FOV) + self.image = Eye.new_image_(image) #TODO remove the FOV, already done in retina's logPolar transform + assert(self.image.shape[:2] == self.inputShape), print("Image must match retina's dims: ",self.image.shape, self.inputShape) + self.roi = self._crop_roi() # Retina image transforms (Parvo & Magnocellular). + print("IMG", self.image.shape) self.retina.run(self.roi) if self.parvo_enc is not None: parvo = self.retina.getParvo() @@ -451,7 +462,7 @@ def compute(self, position=None, rotation=None, scale=None): magno = self.retina.getMagno() # Apply rotation by rolling the images around axis 1. - rotation = self.retina.getOutputSize()[0] * self.orientation / (2 * math.pi) + rotation = self.retina.getOutputSize()[0] * self.orientation / (2 * math.pi) #TODO rotate before retina processes stuff rotation = int(round(rotation)) if self.parvo_enc is not None: self.parvo_img = np.roll(parvo, rotation, axis=0) @@ -576,20 +587,22 @@ def _get_images(path): if not images: print('No images found at file path "%s"!'%args.IMAGE) else: - eye = Eye() + #know the input image dims somehow + inShape = Eye.new_image_(images[0]).shape[:2] + eye = Eye(inShape) + for img_path in images: eye.reset() eye.fovea_scale = 0.2 print("Loading image %s"%img_path) - eye.new_image(img_path) #eye.center_view() #manually set position to look at head: eye.position = (400, 400) for i in range(10): pos,rot,sc = eye.small_random_movement() sc = 1 - (sdrParvo, sdrMagno) = eye.compute(pos,rot,sc) #TODO derive from Encoder - eye.plot(5000) + (sdrParvo, sdrMagno) = eye.compute(img_path, pos,rot,sc) #TODO derive from Encoder + eye.plot(delay=5000) print("Sparsity parvo: {}".format(len(eye.parvo_sdr.sparse)/np.product(eye.parvo_sdr.dimensions))) print("Sparsity magno: {}".format(len(eye.magno_sdr.sparse)/np.product(eye.magno_sdr.dimensions))) print("All images seen.") From 7f4409eaa3bd464d60a3f67fb243f52ad0e03604 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 22 Oct 2019 16:37:07 +0200 Subject: [PATCH 05/10] Eye: transform input image to bigger square TODO avoid that, work with rectangles? --- py/htm/encoders/eye.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index 270fa16d14..c56ea97827 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -261,10 +261,10 @@ def __init__(self, assert(sparsityParvo > 0) self.color = color - reductionFactor_ = inputShape[0]/output_diameter #TODO how would this work with non-square images? + reductionFactor_ = max(inputShape)/output_diameter assert(reductionFactor_ >= 1.0) self.retina = cv2.bioinspired.Retina_create( - inputSize = inputShape, + inputSize = (max(inputShape), max(inputShape)), #FIXME avoid transformation to the bigger square, work with rectangles colorMode = color, colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER, useRetinaLogSampling = True, @@ -453,16 +453,14 @@ def compute(self, image, position=None, rotation=None, scale=None): self.roi = self._crop_roi() # Retina image transforms (Parvo & Magnocellular). - print("IMG", self.image.shape) self.retina.run(self.roi) if self.parvo_enc is not None: parvo = self.retina.getParvo() - print("RAW", parvo.shape) if self.magno_enc is not None: magno = self.retina.getMagno() # Apply rotation by rolling the images around axis 1. - rotation = self.retina.getOutputSize()[0] * self.orientation / (2 * math.pi) #TODO rotate before retina processes stuff + rotation = max(self.retina.getOutputSize()) * self.orientation / (2 * math.pi) #TODO rotate before retina processes stuff rotation = int(round(rotation)) if self.parvo_enc is not None: self.parvo_img = np.roll(parvo, rotation, axis=0) From 22109f29335c51ea0f87cd34f0bb8793bb279c85 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 22 Oct 2019 17:33:23 +0200 Subject: [PATCH 06/10] Eye: rotation should be applied before retina processing. --- py/htm/encoders/eye.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index c56ea97827..19b76f5c9f 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -250,8 +250,6 @@ def __init__(self, """ assert(len(inputShape) == 2) self.inputShape = inputShape - # Argument fovea_scale ... represents "zoom" aka distance from the object/image. - self.fovea_scale = 0.177 assert(output_diameter // 2 * 2 == output_diameter) # Diameter must be an even number. assert(sparsityParvo >= 0 and sparsityParvo <= 1.0) self.sparsityParvo = sparsityParvo @@ -372,14 +370,19 @@ def randomize_view(self, scale_range=None): self.position = [random.uniform(roi_radius, dim - roi_radius) for dim in self.image.shape[:2]] + def rotate_(self, image, angle): + image_center = tuple(np.array(image.shape[1::-1]) / 2) + rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) + result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR) + return result + + def _crop_roi(self): """ Crop to Region Of Interest (ROI) which contains the whole field of view. Adds a black circular boarder to mask out areas which the eye can't see. Note: size of the ROI is (eye.retina.getOutputSize()[0] * resolution_factor). - Note: the circular boarder is actually a bit too far out, playing with - eye.fovea_scale can hide areas which this ROI image will show. Arguments: eye.scale, eye.position, eye.image @@ -446,9 +449,10 @@ def compute(self, image, position=None, rotation=None, scale=None): if scale is not None: self.scale=scale - # apply field of view (FOV) + # apply field of view (FOV) & rotation self.image = Eye.new_image_(image) #TODO remove the FOV, already done in retina's logPolar transform assert(self.image.shape[:2] == self.inputShape), print("Image must match retina's dims: ",self.image.shape, self.inputShape) + self.rotate_(self.image, rotation) self.roi = self._crop_roi() @@ -456,16 +460,10 @@ def compute(self, image, position=None, rotation=None, scale=None): self.retina.run(self.roi) if self.parvo_enc is not None: parvo = self.retina.getParvo() + self.parvo_img = parvo if self.magno_enc is not None: magno = self.retina.getMagno() - - # Apply rotation by rolling the images around axis 1. - rotation = max(self.retina.getOutputSize()) * self.orientation / (2 * math.pi) #TODO rotate before retina processes stuff - rotation = int(round(rotation)) - if self.parvo_enc is not None: - self.parvo_img = np.roll(parvo, rotation, axis=0) - if self.magno_enc is not None: - self.magno_img = np.roll(magno, rotation, axis=0) + self.magno_img = magno # Encode images into SDRs. p = [] @@ -591,7 +589,6 @@ def _get_images(path): for img_path in images: eye.reset() - eye.fovea_scale = 0.2 print("Loading image %s"%img_path) #eye.center_view() #manually set position to look at head: From b36f7a0332d8382471998d60b6252ae3c340e94e Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 22 Oct 2019 17:47:31 +0200 Subject: [PATCH 07/10] Eye: cleanup --- py/htm/encoders/eye.py | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index 19b76f5c9f..0d22ceba44 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -311,8 +311,8 @@ def __init__(self, # output variables: self.image = np.zeros(self.retina.getInputSize()) # the current input RGB image self.roi = None # self.image cropped to region of interest - self.parvo_img = np.zeros(self.retina.getOutputSize()) # output visualization of parvo/magno cells - self.magno_img = np.zeros(self.retina.getOutputSize()) + #self.retina.getParvo() # output visualization of parvo/magno cells + #self.retina.getMagno() self.parvo_sdr = SDR(self.retina.getOutputSize()) # parvo/magno cellular representation (SDR) self.magno_sdr = SDR(self.retina.getOutputSize()) @@ -458,27 +458,19 @@ def compute(self, image, position=None, rotation=None, scale=None): # Retina image transforms (Parvo & Magnocellular). self.retina.run(self.roi) - if self.parvo_enc is not None: - parvo = self.retina.getParvo() - self.parvo_img = parvo - if self.magno_enc is not None: - magno = self.retina.getMagno() - self.magno_img = magno # Encode images into SDRs. - p = [] - m = [] if self.parvo_enc is not None: - p = self.parvo_enc.encode(parvo) + p = self.parvo_enc.encode(self.retina.getParvo()) if self.color: pr, pg, pb = np.dsplit(p, 3) p = np.logical_and(np.logical_and(pr, pg), pb) p = np.expand_dims(np.squeeze(p), axis=2) + self.parvo_sdr.dense = p.flatten() if self.magno_enc is not None: - m = self.magno_enc.encode(magno) + m = self.magno_enc.encode(self.retina.getMagno()) + self.magno_sdr.dense = m.flatten() - self.magno_sdr.dense = m.flatten() - self.parvo_sdr.dense = p.flatten() assert(len(self.magno_sdr.sparse) > 0) assert(len(self.parvo_sdr.sparse) > 0) @@ -517,10 +509,10 @@ def plot(self, window_name='Eye', delay=1000): roi = self.make_roi_pretty() cv2.imshow('Region Of Interest', roi) if self.color: - cv2.imshow('Parvocellular', self.parvo_img[:,:,::-1]) + cv2.imshow('Parvocellular', self.retina.getParvo()[:,:,::-1]) else: - cv2.imshow('Parvocellular', self.parvo_img) - cv2.imshow('Magnocellular', self.magno_img) + cv2.imshow('Parvocellular', self.retina.getParvo()) + cv2.imshow('Magnocellular', self.retina.getMagno()) idx = self.parvo_sdr.dense.astype(np.uint8).reshape(self.retina.getOutputSize())*255 cv2.imshow('Parvo SDR', idx) idx = self.magno_sdr.dense.astype(np.uint8).reshape(self.retina.getOutputSize())*255 From ac37999f32423a24a675cc69ab4fda40d479073f Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 22 Oct 2019 19:44:41 +0200 Subject: [PATCH 08/10] Revert "Eye: transform input image to bigger square" This reverts commit 40dd4b30781c96f8b5bae9545f258c73e75592ce. --- py/htm/encoders/eye.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index 0d22ceba44..d778374b9f 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -259,10 +259,10 @@ def __init__(self, assert(sparsityParvo > 0) self.color = color - reductionFactor_ = max(inputShape)/output_diameter + reductionFactor_ = inputShape[0]/output_diameter #TODO how would this work with non-square images? assert(reductionFactor_ >= 1.0) self.retina = cv2.bioinspired.Retina_create( - inputSize = (max(inputShape), max(inputShape)), #FIXME avoid transformation to the bigger square, work with rectangles + inputSize = inputShape, colorMode = color, colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER, useRetinaLogSampling = True, @@ -457,6 +457,7 @@ def compute(self, image, position=None, rotation=None, scale=None): self.roi = self._crop_roi() # Retina image transforms (Parvo & Magnocellular). + print("IMG", self.image.shape) self.retina.run(self.roi) # Encode images into SDRs. From 8b3e91e04f7f067d837c086eb781dfc479a66f06 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 22 Oct 2019 19:55:14 +0200 Subject: [PATCH 09/10] Revert "Eye: apply log-polar transform from Retion" This reverts commit e46d5f39fba5217126e45bf62d42bbaacd63c9d4. --- py/htm/encoders/eye.py | 55 +++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index d778374b9f..cfd43cdca9 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -48,8 +48,8 @@ class ChannelEncoder: 1. Semantic similarity happens when two inputs which are similar have similar SDR representations. This encoder design does two things to cause semantic similarity: - (1) SDR bits are responsive to a range of input values, and - (2) topology allows nearby bits to represent similar things. + (1) SDR bits are responsive to a range of input values, + and (2) topology allows near by bits to represent similar things. Many encoders apply thresholds to real valued input data to convert the input into Boolean outputs. In this encoder uses two thresholds to form ranges which @@ -223,14 +223,11 @@ class Eye: def __init__(self, - inputShape, output_diameter = 200, # fovea image size, also approximately output SDR size (= diameter^2) sparsityParvo = 0.2, sparsityMagno = 0.025, color = True,): """ - Argument inputShape - shape of the input image(s). - The images, video frames must have the same shape. Argument output_diameter is size of output ... output is a field of view (image) with circular shape. Default 200. `parvo/magno_sdr` size is `output_diameter^2` @@ -248,8 +245,14 @@ def __init__(self, TODO: output of M-cells should be processed on a fast TM. Argument color: use color vision (requires P-cells > 0), default true. (Grayscale is faster) """ - assert(len(inputShape) == 2) - self.inputShape = inputShape + self.output_diameter = output_diameter + # Argument resolution_factor is used to expand the sensor array so that + # the fovea has adequate resolution. After log-polar transform image + # is reduced by this factor back to the output_diameter. + self.resolution_factor = 3 + self.retina_diameter = int(self.resolution_factor * output_diameter) + # Argument fovea_scale ... represents "zoom" aka distance from the object/image. + self.fovea_scale = 0.177 assert(output_diameter // 2 * 2 == output_diameter) # Diameter must be an even number. assert(sparsityParvo >= 0 and sparsityParvo <= 1.0) self.sparsityParvo = sparsityParvo @@ -259,14 +262,12 @@ def __init__(self, assert(sparsityParvo > 0) self.color = color - reductionFactor_ = inputShape[0]/output_diameter #TODO how would this work with non-square images? - assert(reductionFactor_ >= 1.0) self.retina = cv2.bioinspired.Retina_create( - inputSize = inputShape, + inputSize = (3*output_diameter, 3*output_diameter), colorMode = color, colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER, useRetinaLogSampling = True, - reductionFactor = reductionFactor_, # how much is the image under-sampled #TODO tune these params + reductionFactor = 1.2, #!reductionFactor_, # how much is the image under-sampled #TODO tune these params samplingStrenght = 4.0, # how much are the corners blured/forgotten ) @@ -393,7 +394,6 @@ def _crop_roi(self): """ assert(self.image is not None) - r = int(round(self.scale * self.retina.getInputSize()[0] / 2)) x, y = self.position x = int(round(x)) @@ -436,7 +436,6 @@ def _crop_roi(self): def compute(self, image, position=None, rotation=None, scale=None): """ Argument image - string (to load) or numpy.ndarray with image data - Images must match retina's inputShape, so be all of the same dimensions. Arguments position, rotation, scale: optional, if not None, the self.xxx is overriden with the provided value. Returns tuple (SDR parvo, SDR magno) @@ -451,7 +450,6 @@ def compute(self, image, position=None, rotation=None, scale=None): # apply field of view (FOV) & rotation self.image = Eye.new_image_(image) #TODO remove the FOV, already done in retina's logPolar transform - assert(self.image.shape[:2] == self.inputShape), print("Image must match retina's dims: ",self.image.shape, self.inputShape) self.rotate_(self.image, rotation) self.roi = self._crop_roi() @@ -459,17 +457,38 @@ def compute(self, image, position=None, rotation=None, scale=None): # Retina image transforms (Parvo & Magnocellular). print("IMG", self.image.shape) self.retina.run(self.roi) + + if self.parvo_enc is not None: + parvo = self.retina.getParvo() + if self.magno_enc is not None: + magno = self.retina.getMagno() + + # Log Polar Transform. + center = self.retina_diameter / 2 + M = self.retina_diameter * self.fovea_scale + if self.parvo_enc is not None: + parvo = cv2.logPolar(parvo, + center = (center, center), + M = M, + flags = cv2.WARP_FILL_OUTLIERS) + + if self.magno_enc is not None: + magno = cv2.logPolar(magno, + center = (center, center), + M = M, + flags = cv2.WARP_FILL_OUTLIERS) + # Encode images into SDRs. if self.parvo_enc is not None: - p = self.parvo_enc.encode(self.retina.getParvo()) + p = self.parvo_enc.encode(parvo) if self.color: pr, pg, pb = np.dsplit(p, 3) p = np.logical_and(np.logical_and(pr, pg), pb) p = np.expand_dims(np.squeeze(p), axis=2) self.parvo_sdr.dense = p.flatten() if self.magno_enc is not None: - m = self.magno_enc.encode(self.retina.getMagno()) + m = self.magno_enc.encode(magno) self.magno_sdr.dense = m.flatten() assert(len(self.magno_sdr.sparse) > 0) @@ -576,9 +595,7 @@ def _get_images(path): if not images: print('No images found at file path "%s"!'%args.IMAGE) else: - #know the input image dims somehow - inShape = Eye.new_image_(images[0]).shape[:2] - eye = Eye(inShape) + eye = Eye() for img_path in images: eye.reset() From 5a32b96d007675e0dec1ff87ced6161fe97aabb7 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 22 Oct 2019 23:26:03 +0200 Subject: [PATCH 10/10] Eye cleanup --- py/htm/encoders/eye.py | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index cfd43cdca9..ce9e759aad 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -249,10 +249,7 @@ def __init__(self, # Argument resolution_factor is used to expand the sensor array so that # the fovea has adequate resolution. After log-polar transform image # is reduced by this factor back to the output_diameter. - self.resolution_factor = 3 - self.retina_diameter = int(self.resolution_factor * output_diameter) - # Argument fovea_scale ... represents "zoom" aka distance from the object/image. - self.fovea_scale = 0.177 + resolution_factor = 2 assert(output_diameter // 2 * 2 == output_diameter) # Diameter must be an even number. assert(sparsityParvo >= 0 and sparsityParvo <= 1.0) self.sparsityParvo = sparsityParvo @@ -263,7 +260,7 @@ def __init__(self, self.color = color self.retina = cv2.bioinspired.Retina_create( - inputSize = (3*output_diameter, 3*output_diameter), + inputSize = (resolution_factor*output_diameter, resolution_factor*output_diameter), colorMode = color, colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER, useRetinaLogSampling = True, @@ -425,11 +422,6 @@ def _crop_roi(self): inDims_ = inDims_ + (3,) #add 3rd dim '3' for RGB roi.resize( inDims_ ) - # Mask out areas the eye can't see by drawing a circle boarder. - center = int(roi.shape[0] / 2) - circle_mask = np.zeros(roi.shape, dtype=np.uint8) - cv2.circle(circle_mask, (center, center), center, thickness = -1, color=(255,255,255)) - roi = np.minimum(roi, circle_mask) return roi @@ -450,12 +442,10 @@ def compute(self, image, position=None, rotation=None, scale=None): # apply field of view (FOV) & rotation self.image = Eye.new_image_(image) #TODO remove the FOV, already done in retina's logPolar transform - self.rotate_(self.image, rotation) - + self.image = self.rotate_(self.image, rotation) self.roi = self._crop_roi() # Retina image transforms (Parvo & Magnocellular). - print("IMG", self.image.shape) self.retina.run(self.roi) if self.parvo_enc is not None: @@ -464,8 +454,8 @@ def compute(self, image, position=None, rotation=None, scale=None): magno = self.retina.getMagno() # Log Polar Transform. - center = self.retina_diameter / 2 - M = self.retina_diameter * self.fovea_scale + center = self.retina.getInputSize()[0] / 2 + M = self.retina.getInputSize()[0] * self.scale if self.parvo_enc is not None: parvo = cv2.logPolar(parvo, center = (center, center), @@ -491,6 +481,8 @@ def compute(self, image, position=None, rotation=None, scale=None): m = self.magno_enc.encode(magno) self.magno_sdr.dense = m.flatten() + + self.parvo_img = parvo assert(len(self.magno_sdr.sparse) > 0) assert(len(self.parvo_sdr.sparse) > 0) @@ -508,13 +500,6 @@ def make_roi_pretty(self, roi=None): if roi is None: roi = self.roi - # Show the ROI, first rotate it like the eye is rotated. - angle = self.orientation * 360 / (2 * math.pi) - roi = self.roi[:,:,::-1] - rows, cols, color_depth = roi.shape - M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1) - roi = cv2.warpAffine(roi, M, (cols,rows)) - # Invert 5 pixels in the center to show where the fovea is located. center = int(roi.shape[0] / 2) roi[center, center] = np.full(3, 255) - roi[center, center] @@ -522,6 +507,12 @@ def make_roi_pretty(self, roi=None): roi[center-2, center+2] = np.full(3, 255) - roi[center-2, center+2] roi[center-2, center-2] = np.full(3, 255) - roi[center-2, center-2] roi[center+2, center-2] = np.full(3, 255) - roi[center+2, center-2] + + # Mask out areas the eye can't see by drawing a circle boarder. + center = int(roi.shape[0] / 2) + circle_mask = np.zeros(roi.shape, dtype=np.uint8) + cv2.circle(circle_mask, (center, center), center, thickness = -1, color=(255,255,255)) + roi = np.minimum(roi, circle_mask) return roi @@ -544,7 +535,7 @@ def small_random_movement(self): """returns small difference in position, rotation, scale. This is naive "saccadic" movements. """ - max_change_angle = (2*3.14159) / 500 + max_change_angle = (2*math.pi) / 100 self.position = ( self.position[0] + random.gauss(1, .75), self.position[1] + random.gauss(1, .75),) @@ -605,9 +596,9 @@ def _get_images(path): eye.position = (400, 400) for i in range(10): pos,rot,sc = eye.small_random_movement() - sc = 1 + sc = 1.0 #FIXME scaling with any other than 1.0 breaks plots (sdrParvo, sdrMagno) = eye.compute(img_path, pos,rot,sc) #TODO derive from Encoder - eye.plot(delay=5000) + eye.plot(delay=1500) print("Sparsity parvo: {}".format(len(eye.parvo_sdr.sparse)/np.product(eye.parvo_sdr.dimensions))) print("Sparsity magno: {}".format(len(eye.magno_sdr.sparse)/np.product(eye.magno_sdr.dimensions))) print("All images seen.")