123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- ''' Document Localization using Recursive CNN
- Maintainer : Khurram Javed
- Email : kjaved@ualberta.ca '''
- import numpy as np
- import torch
- from PIL import Image
- from torchvision import transforms
- import model
- class corner_finder():
- def __init__(self, CHECKPOINT_DIR, model_type = "resnet"):
- self.model = model.ModelFactory.get_model(model_type, "corner")
- self.model.load_state_dict(torch.load(CHECKPOINT_DIR, map_location='cpu'))
- if torch.cuda.is_available():
- self.model.cuda()
- self.model.eval()
- def get_location(self, img, retainFactor=0.85):
- with torch.no_grad():
- ans_x = 0.0
- ans_y = 0.0
- o_img = np.copy(img)
- y = [0, 0]
- x_start = 0
- y_start = 0
- up_scale_factor = (img.shape[1], img.shape[0])
- myImage = np.copy(o_img)
- test_transform = transforms.Compose([transforms.Resize([32, 32]),
- transforms.ToTensor()])
- CROP_FRAC = retainFactor
- while (myImage.shape[0] > 10 and myImage.shape[1] > 10):
- img_temp = Image.fromarray(myImage)
- img_temp = test_transform(img_temp)
- img_temp = img_temp.unsqueeze(0)
- if torch.cuda.is_available():
- img_temp = img_temp.cuda()
- response = self.model(img_temp).cpu().data.numpy()
- response = response[0]
- response_up = response
- response_up = response_up * up_scale_factor
- y = response_up + (x_start, y_start)
- x_loc = int(y[0])
- y_loc = int(y[1])
- if x_loc > myImage.shape[1] / 2:
- start_x = min(x_loc + int(round(myImage.shape[1] * CROP_FRAC / 2)), myImage.shape[1]) - int(round(
- myImage.shape[1] * CROP_FRAC))
- else:
- start_x = max(x_loc - int(myImage.shape[1] * CROP_FRAC / 2), 0)
- if y_loc > myImage.shape[0] / 2:
- start_y = min(y_loc + int(myImage.shape[0] * CROP_FRAC / 2), myImage.shape[0]) - int(
- myImage.shape[0] * CROP_FRAC)
- else:
- start_y = max(y_loc - int(myImage.shape[0] * CROP_FRAC / 2), 0)
- ans_x += start_x
- ans_y += start_y
- myImage = myImage[start_y:start_y + int(myImage.shape[0] * CROP_FRAC),
- start_x:start_x + int(myImage.shape[1] * CROP_FRAC)]
- img = img[start_y:start_y + int(img.shape[0] * CROP_FRAC),
- start_x:start_x + int(img.shape[1] * CROP_FRAC)]
- up_scale_factor = (img.shape[1], img.shape[0])
- ans_x += y[0]
- ans_y += y[1]
- return (int(round(ans_x)), int(round(ans_y)))
- if __name__ == "__main__":
- pass
|