Others
/
DocumentAIKit


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
							''' Document Localization using Recursive CNN
 Maintainer : Khurram Javed
 Email : kjaved@ualberta.ca '''

import numpy as np
import torch
from PIL import Image
from torchvision import transforms

import model


class GetCorners:
    def __init__(self, checkpoint_dir):
        self.model = model.ModelFactory.get_model("resnet", 'document')
        self.model.load_state_dict(torch.load(checkpoint_dir, map_location='cpu'))
        if torch.cuda.is_available():
            self.model.cuda()
        self.model.eval()

    def get(self, pil_image):
        with torch.no_grad():
            image_array = np.copy(pil_image)
            pil_image = Image.fromarray(pil_image)
            test_transform = transforms.Compose([transforms.Resize([32, 32]),
                                                 transforms.ToTensor()])
            img_temp = test_transform(pil_image)

            img_temp = img_temp.unsqueeze(0)
            if torch.cuda.is_available():
                img_temp = img_temp.cuda()

            model_prediction = self.model(img_temp).cpu().data.numpy()[0]

            model_prediction = np.array(model_prediction)

            x_cords = model_prediction[[0, 2, 4, 6]]
            y_cords = model_prediction[[1, 3, 5, 7]]

            x_cords = x_cords * image_array.shape[1]
            y_cords = y_cords * image_array.shape[0]

            # Extract the four corners of the image. Read "Region Extractor" in Section III of the paper for an explanation.

            top_left = image_array[
                       max(0, int(2 * y_cords[0] - (y_cords[3] + y_cords[0]) / 2)):int((y_cords[3] + y_cords[0]) / 2),
                       max(0, int(2 * x_cords[0] - (x_cords[1] + x_cords[0]) / 2)):int((x_cords[1] + x_cords[0]) / 2)]

            top_right = image_array[
                        max(0, int(2 * y_cords[1] - (y_cords[1] + y_cords[2]) / 2)):int((y_cords[1] + y_cords[2]) / 2),
                        int((x_cords[1] + x_cords[0]) / 2):min(image_array.shape[1] - 1,
                                                               int(x_cords[1] + (x_cords[1] - x_cords[0]) / 2))]

            bottom_right = image_array[int((y_cords[1] + y_cords[2]) / 2):min(image_array.shape[0] - 1, int(
                y_cords[2] + (y_cords[2] - y_cords[1]) / 2)),
                           int((x_cords[2] + x_cords[3]) / 2):min(image_array.shape[1] - 1,
                                                                  int(x_cords[2] + (x_cords[2] - x_cords[3]) / 2))]

            bottom_left = image_array[int((y_cords[0] + y_cords[3]) / 2):min(image_array.shape[0] - 1, int(
                y_cords[3] + (y_cords[3] - y_cords[0]) / 2)),
                          max(0, int(2 * x_cords[3] - (x_cords[2] + x_cords[3]) / 2)):int(
                              (x_cords[3] + x_cords[2]) / 2)]

            top_left = (top_left, max(0, int(2 * x_cords[0] - (x_cords[1] + x_cords[0]) / 2)),
                        max(0, int(2 * y_cords[0] - (y_cords[3] + y_cords[0]) / 2)))
            top_right = (
            top_right, int((x_cords[1] + x_cords[0]) / 2), max(0, int(2 * y_cords[1] - (y_cords[1] + y_cords[2]) / 2)))
            bottom_right = (bottom_right, int((x_cords[2] + x_cords[3]) / 2), int((y_cords[1] + y_cords[2]) / 2))
            bottom_left = (bottom_left, max(0, int(2 * x_cords[3] - (x_cords[2] + x_cords[3]) / 2)),
                           int((y_cords[0] + y_cords[3]) / 2))

            return top_left, top_right, bottom_right, bottom_left