Project Setup

Make sure we're running TensorFlow v1

try:
  %tensorflow_version 1.x
except Exception:
  pass

TensorFlow 1.x selected.

Install Mask-RCNN model

%%capture
%%bash
pip install -U git+https://github.com/matterport/Mask_RCNN

Download weights of pretrained Mask-RCNN

!curl -L -o mask_rcnn_balloon.h5 https://github.com/matterport/Mask_RCNN/releases/download/v2.1/mask_rcnn_balloon.h5?raw=true

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   611  100   611    0     0   2246      0 --:--:-- --:--:-- --:--:--  2254
100  244M  100  244M    0     0  40.0M      0  0:00:06  0:00:06 --:--:-- 47.2M

import cv2
import math
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
from mrcnn import utils
from mrcnn import model as modellib
from mrcnn.config import Config
from PIL import Image

plt.rcParams["figure.figsize"]= (10,10)
np.set_printoptions(precision=3)

Mask-RCNN setup

# Load the pre-trained model data
ROOT_DIR = os.getcwd()
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

Downloading pretrained model to /content/mask_rcnn_coco.h5 ...
... done downloading pretrained model!

class InferenceConfig(Config):
    """Configuration for training on MS COCO.
    Derives from the base Config class and overrides values specific
    to the COCO dataset.
    """
    # Give the configuration a recognizable name
    NAME = "coco"

    # Number of images to train with on each GPU. A 12GB GPU can typically
    # handle 2 images of 1024x1024px.
    IMAGES_PER_GPU = 1

    # Uncomment to train on 8 GPUs (default is 1)
    GPU_COUNT = 1

    # Number of classes (including background)
    NUM_CLASSES = 1 + 80  # COCO has 80 classes

%%capture
# COCO dataset object names
model = modellib.MaskRCNN(
    mode="inference", model_dir=MODEL_DIR, config=InferenceConfig()
)
model.load_weights(COCO_MODEL_PATH, by_name=True)
class_names = [
    'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
    'bus', 'train', 'truck', 'boat', 'traffic light',
    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
    'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
    'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
    'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard',
    'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
    'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
    'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
    'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
    'teddy bear', 'hair drier', 'toothbrush'
]

The following function will apply to the origianl image, the pixels from the gray image is 0, otherwise keep the pixels from original picture.

# This function is used to change the colorful background information to grayscale.
# image[:,:,0] is the Blue channel,image[:,:,1] is the Green channel, image[:,:,2] is the Red channel
# mask == 0 means that this pixel is not belong to the object.
# np.where function means that if the pixel belong to background, change it to gray_image.
# Since the gray_image is 2D, for each pixel in background, we should set 3 channels to the same value to keep the grayscale.

def apply_mask(image, mask_image, mask):
    """Helper function to apply a mask to an image."""
    image[:, :, 0] = np.where(
        mask == 0,
        mask_image[:, :, 0],
        image[:, :, 0]
    )
    image[:, :, 1] = np.where(
        mask == 0,
        mask_image[:, :, 1],
        image[:, :, 1]
    )
    image[:, :, 2] = np.where(
        mask == 0,
        mask_image[:, :, 2],
        image[:, :, 2]
    )
    return image

def process_image(image, mask_image, boxes, masks, ids, names, scores, target_label):
    """Helper function to find the object with biggest bounding box and apply mask to it."""
    # max_area will save the largest object for all the detection results
    max_area = 0
    
    # n_instances saves the amount of all objects
    n_instances = boxes.shape[0]

    if not n_instances:
        print('NO INSTANCES TO DISPLAY')
    else:
        assert boxes.shape[0] == masks.shape[-1] == ids.shape[0]

    for i in range(n_instances):
        if not np.any(boxes[i]):
            continue

        # compute the square of each object
        y1, x1, y2, x2 = boxes[i]
        square = (y2 - y1) * (x2 - x1)

        # use label to select the object with given label from all the 80 classes in COCO dataset
        current_label = names[ids[i]]
        if target_label is not None or current_label == target_label:
            # save the largest object in the image as main character
            # other people will be regarded as background
            if square > max_area:
                max_area = square
                mask = masks[:, :, i]
            else:
                continue
        else:
            continue

        # apply mask for the image
    # by mistake you put apply_mask inside for loop or you can write continue in if also
    image = apply_mask(image, mask_image, mask)
        
    return image

Now the mode is ready to use

!curl -L -o cat_input.jpg https://unsplash.com/photos/7GX5aICb5i4/download?force=true&w=640

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   232    0   232    0     0    666      0 --:--:-- --:--:-- --:--:--   666
100 5442k  100 5442k    0     0  10.5M      0 --:--:-- --:--:-- --:--:-- 10.5M

# Credit for the image: https://unsplash.com/photos/7GX5aICb5i4
image = cv2.imread('./cat_input.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)

<matplotlib.image.AxesImage at 0x7f4274e4b710>

Application 1: Grayscale the background

Recognize the main character, keep it colorfull while grayscal the background of the image.

# Use cvtColor to accomplish image transformation from RGB image to gray image
mask_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
mask_image = np.stack([mask_image, mask_image, mask_image], axis=2)
plt.imshow(mask_image)

<matplotlib.image.AxesImage at 0x7f4274e21a90>

results = model.detect([image], verbose=0)
output_dict = results[0]
rois, class_ids, scores, masks = output_dict.values()

result = process_image(
    image.copy(), mask_image, rois, masks, class_ids, class_names, scores, 'cat'
)
plt.imshow(result)

<matplotlib.image.AxesImage at 0x7f427458c860>

Let's take this cat to the beach

!curl -L -o beach.jpg https://unsplash.com/photos/DH_u2aV3nGM/download?force=true&w=640

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   242    0   242    0     0    733      0 --:--:-- --:--:-- --:--:--   731
100 4000k  100 4000k    0     0  9828k      0 --:--:-- --:--:-- --:--:-- 9828k

image_beach = cv2.imread('./beach.jpg')
image_beach = cv2.cvtColor(image_beach, cv2.COLOR_BGR2RGB)
plt.imshow(image_beach)

<matplotlib.image.AxesImage at 0x7f427456cc18>

Reshape the new mask image so that it matches the size of the original image.

image_beach = cv2.resize(image_beach, dsize=(image.shape[1], image.shape[0]), interpolation = cv2.INTER_AREA)

result = process_image(
    image.copy(), image_beach, rois, masks, class_ids, class_names, scores, 'cat'
)
plt.imshow(result)

<matplotlib.image.AxesImage at 0x7f42744ccba8>

Think of the possibilites :)