import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from six import BytesIO
from pathlib import Path
import tensorflow as tf
%matplotlib inline
!git clone --depth 1 https://github.com/tensorflow/models
# Install the Object Detection API
%%bash
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install -q .
from object_detection.utils import colab_utils
from object_detection.utils import visualization_utils as viz_utils
Download an image dataset to annotate, for instance The Oxford-IIIT Pet Dataset (link)
%%bash
curl -O https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
tar xzf images.tar.gz
paths = list([str(p) for p in Path('images').glob('*')])
Utility method to load an image from path into a uint8 numpy array with shape (height, width, channels), where channels=3 for RGB.
def load_image_into_numpy_array(path):
  img_data = tf.io.gfile.GFile(path, 'rb').read()
  image = Image.open(BytesIO(img_data))
  (im_width, im_height) = image.size
  image_np = np.array(image.getdata(), dtype=np.uint8)
  return image_np.reshape((im_height, im_width, 3))
For testing select a random subset of the images (we don't want load all images)
sample_size = 10
sample_paths = [paths[np.random.randint(len(paths))] for i in range(10)]
Load the selected random images into numpy arrays
images_np = [load_image_into_numpy_array(str(p)) for p in sample_paths]
boxes = []
colab_utils.annotate(images_np, box_storage_pointer=boxes)
Define the indexes for the categories
category_index = {
    0: {'id': 0, 'name': 'dog'},
    1: {'id': 1, 'name': 'cat'}
}
Wrapper function to visualize the original image along with the best detected box. It takes are arguments:
- image_np: uint8 numpy array with shape (img_height, img_width, 3)
- boxes: a numpy array of shape [N, 4]
- classes: a numpy array of shape [N]. Note that class indices are 1-based, and match the keys in the label map.
- scores: a numpy array of shape [N] or None. If scores=None, then this function assumes that the boxes to be plotted are groundtruth boxes and plot all boxes as black with no classes or scores.
- category_index: a dict containing category dictionaries (each holding category index idand category namename) keyed by category indices.
- figsize: (optional) size for the figure.
- image_name: (optional) name for the image file.
def plot_detections(image_np, boxes, classes, scores, category_index, figsize=(12, 16), image_name=None):
  image_np_with_annotations = image_np.copy()
  viz_utils.visualize_boxes_and_labels_on_image_array(
      image_np_with_annotations,
      boxes,
      classes,
      scores,
      category_index,
      use_normalized_coordinates=True,
      min_score_thresh=0.8)
  if image_name:
    plt.imsave(image_name, image_np_with_annotations)
  else:
    plt.imshow(image_np_with_annotations)
I manually inspected the images (that's the 100% scores below) to get the class for each one, note that:
- 0is for a cat image
- 1is for a dog image
classes = [
  np.ones(shape=(1), dtype=np.int32),
  np.ones(shape=(1), dtype=np.int32),
  np.zeros(shape=(1), dtype=np.int32),
  np.ones(shape=(1), dtype=np.int32),
  np.zeros(shape=(1), dtype=np.int32)
]
# give boxes a score of 100%
scores = np.array([1.0], dtype=np.float32)
Vizualise the images with their bounding boxes
plt.figure(figsize=(30, 15))
for idx in range(5):
  plt.subplot(2, 3, idx+1)
  plot_detections(images_np[idx], boxes[idx], classes[idx], scores, category_index)
plt.show()