try:
%tensorflow_version 1.x
except Exception:
pass
Install Mask-RCNN model
%%capture
%%bash
pip install -U git+https://github.com/matterport/Mask_RCNN
Download weights of pretrained Mask-RCNN
!curl -L -o mask_rcnn_balloon.h5 https://github.com/matterport/Mask_RCNN/releases/download/v2.1/mask_rcnn_balloon.h5?raw=true
import cv2
import math
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
from mrcnn import utils
from mrcnn import model as modellib
from mrcnn.config import Config
from PIL import Image
plt.rcParams["figure.figsize"]= (10,10)
np.set_printoptions(precision=3)
# Load the pre-trained model data
ROOT_DIR = os.getcwd()
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
if not os.path.exists(COCO_MODEL_PATH):
utils.download_trained_weights(COCO_MODEL_PATH)
class InferenceConfig(Config):
"""Configuration for training on MS COCO.
Derives from the base Config class and overrides values specific
to the COCO dataset.
"""
# Give the configuration a recognizable name
NAME = "coco"
# Number of images to train with on each GPU. A 12GB GPU can typically
# handle 2 images of 1024x1024px.
IMAGES_PER_GPU = 1
# Uncomment to train on 8 GPUs (default is 1)
GPU_COUNT = 1
# Number of classes (including background)
NUM_CLASSES = 1 + 80 # COCO has 80 classes
%%capture
# COCO dataset object names
model = modellib.MaskRCNN(
mode="inference", model_dir=MODEL_DIR, config=InferenceConfig()
)
model.load_weights(COCO_MODEL_PATH, by_name=True)
class_names = [
'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush'
]
The following function will apply to the origianl image, the pixels from the gray image is 0, otherwise keep the pixels from original picture.
# This function is used to change the colorful background information to grayscale.
# image[:,:,0] is the Blue channel,image[:,:,1] is the Green channel, image[:,:,2] is the Red channel
# mask == 0 means that this pixel is not belong to the object.
# np.where function means that if the pixel belong to background, change it to gray_image.
# Since the gray_image is 2D, for each pixel in background, we should set 3 channels to the same value to keep the grayscale.
def apply_mask(image, mask_image, mask):
"""Helper function to apply a mask to an image."""
image[:, :, 0] = np.where(
mask == 0,
mask_image[:, :, 0],
image[:, :, 0]
)
image[:, :, 1] = np.where(
mask == 0,
mask_image[:, :, 1],
image[:, :, 1]
)
image[:, :, 2] = np.where(
mask == 0,
mask_image[:, :, 2],
image[:, :, 2]
)
return image
def process_image(image, mask_image, boxes, masks, ids, names, scores, target_label):
"""Helper function to find the object with biggest bounding box and apply mask to it."""
# max_area will save the largest object for all the detection results
max_area = 0
# n_instances saves the amount of all objects
n_instances = boxes.shape[0]
if not n_instances:
print('NO INSTANCES TO DISPLAY')
else:
assert boxes.shape[0] == masks.shape[-1] == ids.shape[0]
for i in range(n_instances):
if not np.any(boxes[i]):
continue
# compute the square of each object
y1, x1, y2, x2 = boxes[i]
square = (y2 - y1) * (x2 - x1)
# use label to select the object with given label from all the 80 classes in COCO dataset
current_label = names[ids[i]]
if target_label is not None or current_label == target_label:
# save the largest object in the image as main character
# other people will be regarded as background
if square > max_area:
max_area = square
mask = masks[:, :, i]
else:
continue
else:
continue
# apply mask for the image
# by mistake you put apply_mask inside for loop or you can write continue in if also
image = apply_mask(image, mask_image, mask)
return image
Now the mode is ready to use
!curl -L -o cat_input.jpg https://unsplash.com/photos/7GX5aICb5i4/download?force=true&w=640
# Credit for the image: https://unsplash.com/photos/7GX5aICb5i4
image = cv2.imread('./cat_input.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
# Use cvtColor to accomplish image transformation from RGB image to gray image
mask_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
mask_image = np.stack([mask_image, mask_image, mask_image], axis=2)
plt.imshow(mask_image)
results = model.detect([image], verbose=0)
output_dict = results[0]
rois, class_ids, scores, masks = output_dict.values()
result = process_image(
image.copy(), mask_image, rois, masks, class_ids, class_names, scores, 'cat'
)
plt.imshow(result)
Let's take this cat to the beach
!curl -L -o beach.jpg https://unsplash.com/photos/DH_u2aV3nGM/download?force=true&w=640
image_beach = cv2.imread('./beach.jpg')
image_beach = cv2.cvtColor(image_beach, cv2.COLOR_BGR2RGB)
plt.imshow(image_beach)
Reshape the new mask image so that it matches the size of the original image.
image_beach = cv2.resize(image_beach, dsize=(image.shape[1], image.shape[0]), interpolation = cv2.INTER_AREA)
result = process_image(
image.copy(), image_beach, rois, masks, class_ids, class_names, scores, 'cat'
)
plt.imshow(result)
Think of the possibilites :)