import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from six import BytesIO
from pathlib import Path
import tensorflow as tf
%matplotlib inline

Install Object Detection API

!git clone --depth 1 https://github.com/tensorflow/models

Cloning into 'models'...
remote: Enumerating objects: 2797, done.
remote: Counting objects: 100% (2797/2797), done.
remote: Compressing objects: 100% (2439/2439), done.
remote: Total 2797 (delta 563), reused 1405 (delta 322), pack-reused 0
Receiving objects: 100% (2797/2797), 57.73 MiB | 31.67 MiB/s, done.
Resolving deltas: 100% (563/563), done.

# Install the Object Detection API
%%bash
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install -q .

object_detection/protos/input_reader.proto: warning: Import object_detection/protos/image_resizer.proto but not used.

from object_detection.utils import colab_utils
from object_detection.utils import visualization_utils as viz_utils

Download data for annotation

Download an image dataset to annotate, for instance The Oxford-IIIT Pet Dataset (link)

%%bash
curl -O https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
tar xzf images.tar.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  755M  100  755M    0     0  29.8M      0  0:00:25  0:00:25 --:--:-- 31.4M

paths = list([str(p) for p in Path('images').glob('*')])

Utility method to load an image from path into a uint8 numpy array with shape (height, width, channels), where channels=3 for RGB.

def load_image_into_numpy_array(path):
  img_data = tf.io.gfile.GFile(path, 'rb').read()
  image = Image.open(BytesIO(img_data))
  (im_width, im_height) = image.size
  image_np = np.array(image.getdata(), dtype=np.uint8)
  return image_np.reshape((im_height, im_width, 3))

For testing select a random subset of the images (we don't want load all images)

sample_size = 10
sample_paths = [paths[np.random.randint(len(paths))] for i in range(10)]

Annotate images

Load the selected random images into numpy arrays

images_np = [load_image_into_numpy_array(str(p)) for p in sample_paths]

boxes = []
colab_utils.annotate(images_np, box_storage_pointer=boxes)

'--boxes array populated--'

'--boxes array populated--'

'--boxes array populated--'

'--boxes array populated--'

'--boxes array populated--'

'--boxes array populated--'

'--boxes array populated--'

'--boxes array populated--'

'--boxes array populated--'

'--boxes array populated--'

Define the indexes for the categories

category_index = {
    0: {'id': 0, 'name': 'dog'},
    1: {'id': 1, 'name': 'cat'}
}

Inspect the annotations

Wrapper function to visualize the original image along with the best detected box. It takes are arguments:

image_np: uint8 numpy array with shape (img_height, img_width, 3)
boxes: a numpy array of shape [N, 4]
classes: a numpy array of shape [N]. Note that class indices are 1-based, and match the keys in the label map.
scores: a numpy array of shape [N] or None. If scores=None, then this function assumes that the boxes to be plotted are groundtruth boxes and plot all boxes as black with no classes or scores.
category_index: a dict containing category dictionaries (each holding category index id and category name name) keyed by category indices.
figsize: (optional) size for the figure.
image_name: (optional) name for the image file.

def plot_detections(image_np, boxes, classes, scores, category_index, figsize=(12, 16), image_name=None):
  image_np_with_annotations = image_np.copy()
  viz_utils.visualize_boxes_and_labels_on_image_array(
      image_np_with_annotations,
      boxes,
      classes,
      scores,
      category_index,
      use_normalized_coordinates=True,
      min_score_thresh=0.8)
  if image_name:
    plt.imsave(image_name, image_np_with_annotations)
  else:
    plt.imshow(image_np_with_annotations)

I manually inspected the images (that's the 100% scores below) to get the class for each one, note that:

0 is for a cat image
1 is for a dog image

classes = [
  np.ones(shape=(1), dtype=np.int32),
  np.ones(shape=(1), dtype=np.int32),
  np.zeros(shape=(1), dtype=np.int32),
  np.ones(shape=(1), dtype=np.int32),
  np.zeros(shape=(1), dtype=np.int32)
]
# give boxes a score of 100%
scores = np.array([1.0], dtype=np.float32)

Vizualise the images with their bounding boxes

plt.figure(figsize=(30, 15))
for idx in range(5):
  plt.subplot(2, 3, idx+1)
  plot_detections(images_np[idx], boxes[idx], classes[idx], scores, category_index)
plt.show()