Spaces:

pluniak
/

ocrd

Sleeping

App Files Files Community

ocrd / helpers.py

pluniak

Update helpers.py

92cd9d3 verified over 1 year ago

raw

history blame

30.4 kB

	import cv2
	import numpy as np
	import json
	from PIL import Image, ImageDraw, ImageFont
	from transformers import pipeline
	from huggingface_hub import from_pretrained_keras
	import imageio


	def resize_image(img_in,input_height,input_width):
	return cv2.resize( img_in, ( input_width,input_height) ,interpolation=cv2.INTER_NEAREST)

	def write_dict_to_json(dictionary, save_path, indent=4):
	with open(save_path, "w") as outfile:
	json.dump(dictionary, outfile, indent=indent)

	def load_json_to_dict(load_path):
	with open(load_path) as json_file:
	return json.load(json_file)


	class OCRD:
	"""
	Optical Character Recognition and Document processing class that provides functionalities
	to preprocess images, detect text lines, perform OCR, and visualize the results.

	The class utilizes deep learning models for various tasks such as binarization and text
	line segmentation. It provides comprehensive methods to handle image scaling, prediction,
	text extraction, and overlaying recognized text on images.

	Attributes:
	image (ndarray): The image loaded into memory from the specified path. This image
	is used across various methods within the class.

	Methods:
	__init__(img_path: str):
	Initializes the OCRD class by loading an image from the specified file path.

	scale_image(img: ndarray) -> ndarray:
	Scales an image while maintaining its aspect ratio based on predefined width thresholds.

	predict(model, img: ndarray) -> ndarray:
	Uses a specified model to make predictions on the image. This function handles
	image resizing and segmenting for model input.

	binarize_image(img: ndarray, binarize_mode: str) -> ndarray:
	Applies binarization to the image based on the specified mode ('detailed', 'fast', or 'no').

	segment_textlines(img: ndarray) -> ndarray:
	Segments text lines from the binarized image using a pretrained model.

	extract_filter_and_deskew_textlines(img: ndarray, textline_mask: ndarray, min_pixel_sum: int, median_bounds: tuple) -> (dict, ndarray):
	Processes an image to extract and correct orientation of text lines based on the provided mask.

	ocr_on_textlines(textline_images: dict) -> dict:
	Performs OCR on the extracted text lines and returns the recognized text.

	create_text_overlay_image(textline_images: dict, textline_preds: dict, img_shape: tuple, font_size: int) -> Image:
	Creates an image overlay with the recognized text annotations.

	visualize_model_output(prediction: ndarray, img: ndarray) -> ndarray:
	Visualizes the model's prediction by overlaying it onto the original image with distinct colors.
	"""

	def __init__(self, img_path):
	self.image = np.array(Image.open(img_path))

	def scale_image(self, img):
	"""
	Scales an image to have dimensions suitable for neural network inference. Scaling is based on the
	input width parameter. The new width and height of the image are calculated to maintain the aspect
	ratio of the original image.

	Parameters:
	- img (ndarray): The image to be scaled, expected to be in the form of a numpy array where
	img.shape[0] is the height and img.shape[1] is the width.

	Behavior:
	- If image width is less than 1100, the new width is set to 2000 pixels. The height is adjusted
	to maintain the aspect ratio.
	- If image width is between 1100 (inclusive) and 2500 (exclusive), the width remains unchanged
	and the height is adjusted to maintain the aspect ratio.
	- If image width is 2500 or more, the width is set to 2000 pixels and the height is similarly
	adjusted to maintain the aspect ratio.

	Returns:
	- img_new (ndarray): A new image array that has been resized according to the specified rules.
	The aspect ratio of the original image is preserved.

	Note:
	- This function assumes that a function `resize_image(img, height, width)` is available and is
	used to resize the image where `img` is the original image array, `height` is the new height,
	and `width` is the new width.
	"""

	width_early = img.shape[1]

	if width_early < 1100:
	img_w_new = 2000
	img_h_new = int(img.shape[0] / float(img.shape[1]) * 2000)
	elif width_early >= 1100 and width_early < 2500:
	img_w_new = width_early
	img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early)
	else:
	img_w_new = 2000
	img_h_new = int(img.shape[0] / float(img.shape[1]) * 2000)

	img_new = resize_image(img, img_h_new, img_w_new)

	return img_new

	def predict(self, model, img):
	"""
	Processes an image to predict segmentation outputs using a given model. The function handles image resizing
	to match the model's input dimensions and ensures that the entire image is processed by segmenting it into patches
	that the model can handle. The prediction from these patches is then reassembled into a single output image.

	Parameters:
	- model (keras.Model): The neural network model used for predicting the image segmentation. The model should have
	predefined input dimensions (height and width).
	- img (ndarray): The image to be processed, represented as a numpy array.

	Returns:
	- prediction_true (ndarray): An image of the same size as the input image, containing the segmentation prediction
	with each pixel labeled according to the model's output.

	Details:
	- The function first scales the input image according to the model's required input dimensions. If the scaled image
	is smaller than the model's height or width, it is resized to match exactly.
	- The function processes the image in overlapping patches to ensure smooth transitions between the segments. These
	patches are then processed individually through the model.
	- Predictions from these patches are then stitched together to form a complete output image, ensuring that edge
	artifacts are minimized by carefully blending the overlapping areas.
	- This method assumes the availability of `resize_image` function for scaling and resizing
	operations, respectively.
	- The output is converted to an 8-bit image before returning, suitable for display or further processing.
	"""

	# bitmap output
	img_height_model=model.layers[len(model.layers)-1].output_shape[1]
	img_width_model=model.layers[len(model.layers)-1].output_shape[2]

	img = self.scale_image(img)

	if img.shape[0] < img_height_model:
	img = resize_image(img, img_height_model, img.shape[1])

	if img.shape[1] < img_width_model:
	img = resize_image(img, img.shape[0], img_width_model)

	marginal_of_patch_percent = 0.1
	margin = int(marginal_of_patch_percent * img_height_model)
	width_mid = img_width_model - 2 * margin
	height_mid = img_height_model - 2 * margin
	img = img / float(255.0)
	img = img.astype(np.float16)
	img_h = img.shape[0]
	img_w = img.shape[1]
	prediction_true = np.zeros((img_h, img_w, 3))
	nxf = img_w / float(width_mid)
	nyf = img_h / float(height_mid)
	nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
	nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)

	for i in range(nxf):
	for j in range(nyf):
	if i == 0:
	index_x_d = i * width_mid
	index_x_u = index_x_d + img_width_model
	else:
	index_x_d = i * width_mid
	index_x_u = index_x_d + img_width_model
	if j == 0:
	index_y_d = j * height_mid
	index_y_u = index_y_d + img_height_model
	else:
	index_y_d = j * height_mid
	index_y_u = index_y_d + img_height_model
	if index_x_u > img_w:
	index_x_u = img_w
	index_x_d = img_w - img_width_model
	if index_y_u > img_h:
	index_y_u = img_h
	index_y_d = img_h - img_height_model

	img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
	label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]),
	verbose=0)

	seg = np.argmax(label_p_pred, axis=3)[0]
	seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)

	if i == 0 and j == 0:
	seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :]
	prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color
	elif i == nxf - 1 and j == nyf - 1:
	seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :]
	prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color
	elif i == 0 and j == nyf - 1:
	seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :]
	prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color
	elif i == nxf - 1 and j == 0:
	seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :]
	prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color
	elif i == 0 and j != 0 and j != nyf - 1:
	seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :]
	prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color
	elif i == nxf - 1 and j != 0 and j != nyf - 1:
	seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :]
	prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color
	elif i != 0 and i != nxf - 1 and j == 0:
	seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :]
	prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color
	elif i != 0 and i != nxf - 1 and j == nyf - 1:
	seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :]
	prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color
	else:
	seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :]
	prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color

	prediction_true = prediction_true.astype(np.uint8)

	return prediction_true

	def binarize_image(self, img, binarize_mode='detailed'):
	"""
	Binarizes an image according to the specified mode.

	Parameters:
	- img (ndarray): The input image to be binarized.
	- binarize_mode (str): The mode of binarization. Can be 'detailed', 'fast', or 'no'.
	- 'detailed': Uses a pre-trained deep learning model for binarization.
	- 'fast': Uses OpenCV for a quicker, threshold-based binarization.
	- 'no': Returns a copy of the original image.

	Returns:
	- ndarray: The binarized image.

	Raises:
	- ValueError: If an invalid binarize_mode is provided.

	Description:
	Depending on the 'binarize_mode', the function processes the image differently:
	- For 'detailed' mode, it loads a specific model and performs prediction to binarize the image.
	- For 'fast' mode, it quickly converts the image to grayscale and applies a threshold.
	- For 'no' mode, it simply returns the original image unchanged.
	If an unsupported mode is provided, the function raises a ValueError.

	Note:
	- The 'detailed' mode requires a pre-trained model from huggingface_hub.
	- This function depends on OpenCV (cv2) for image processing in 'fast' mode.
	"""

	if binarize_mode == 'detailed':
	model_name = "SBB/eynollah-binarization"
	model = from_pretrained_keras(model_name)
	binarized = self.predict(model, img)

	# Convert from mask to image (letters black)
	binarized = binarized.astype(np.int8)
	binarized = -binarized + 1
	binarized = (binarized * 255).astype(np.uint8)

	elif binarize_mode == 'fast':
	binarized = self.scale_image(img, self.image)
	binarized = cv2.cvtColor(binarized, cv2.COLOR_BGR2GRAY)
	_, binarized = cv2.threshold(binarized, 0, 255, cv2.THRESH_BINARY \| cv2.THRESH_OTSU)
	binarized = np.repeat(binarized[:, :, np.newaxis], 3, axis=2)

	elif binarize_mode == 'no':
	binarized = img.copy()

	else:
	accepted_values = ['detailed', 'fast', 'no']
	raise ValueError(f"Invalid value provided: {binarize_mode}. Accepted values are: {accepted_values}")

	binarized = binarized.astype(np.uint8)

	return binarized


	def segment_textlines(self, img):
	'''
	ADD DOCUMENTATION!
	'''
	model_name = "SBB/eynollah-textline"
	model = from_pretrained_keras(model_name)
	textline_segments = self.predict(model, img)

	return textline_segments


	def extract_filter_and_deskew_textlines(self, img, textline_mask, min_pixel_sum=20, median_bounds=(.5, 20)):

	"""
	Extracts and deskews text lines from an image based on a provided textline mask. This function identifies
	text lines, filters out those that do not meet size criteria, calculates their minimum area rectangles,
	performs perspective transformations to deskew each text line, and handles potential rotations to ensure
	text lines are presented horizontally.

	Parameters:
	- img (numpy.ndarray): The original image from which to extract and deskew text lines. It should be a 3D array.
	- textline_mask (numpy.ndarray): A binary mask where text lines have been segmented. It should be a 2D array.
	- min_pixel_sum (int, optional): The minimum number of pixels (area) a connected component must have to be considered
	a valid text line. If None, no filtering is applied.
	- median_bounds (tuple, optional): A tuple representing the lower and upper bounds as multipliers for filtering
	text lines based on the median size of identified text lines. If None, no filtering is applied.

	Returns:
	- tuple:
	- dict: A dictionary containing lists of the extracted and deskewed text line images along with their
	metadata (center, left side, height, width, and rotation angle of the bounding box).
	- numpy.ndarray: An image visualization of the filtered text line mask for debugging or analysis.

	Description:
	The function first uses connected components to identify potential text lines from the mask. It filters these
	based on absolute size (min_pixel_sum) and relative size (median_bounds). For each valid text line, it computes
	a minimum area rectangle, extracts and deskews the bounded region. This includes rotating the text line if it
	is detected as vertical (taller than wide). Finally, it aggregates the results and provides an image for
	visualization of the text lines retained after filtering.

	Notes:
	- This function assumes the textline_mask is properly segmented and binary (0s for background, 255 for text lines).
	- Errors in perspective transformation due to incorrect contour extraction or bounding box calculations are handled
	gracefully, reporting the error but continuing with other text lines.
	"""

	num_labels, labels_im = cv2.connectedComponents(textline_mask)

	# Thresholds for filtering
	MIN_PIXEL_SUM = min_pixel_sum # absolute filtering
	MEDIAN_LOWER_BOUND = median_bounds[0] # relative filtering
	MEDIAN_UPPER_BOUND = median_bounds[1] # relative filtering

	# Gather masks and their sizes
	cc_sizes = []
	masks = []
	labels_im_filtered = labels_im > 0 # for visualizing filtering result
	for label in range(1, num_labels): # ignore background class
	mask = np.where(labels_im == label, True, False)
	if MIN_PIXEL_SUM is None:
	is_above_min_pixel_sum = True
	else:
	is_above_min_pixel_sum = mask.sum() > MIN_PIXEL_SUM
	if is_above_min_pixel_sum: # dismiss mini segmentations to avoid skewing of median
	cc_sizes.append(mask.sum())
	masks.append(mask)

	# filter masks by size in relation to median; then calculate contours and min area bounding box for remaining ones
	rectangles = []
	median = np.median(cc_sizes)
	for mask in masks:
	mask_sum = mask.sum()
	if MEDIAN_LOWER_BOUND is None:
	is_above_lower_media_bound = True
	else:
	is_above_lower_media_bound = mask_sum > median*MEDIAN_LOWER_BOUND
	if MEDIAN_UPPER_BOUND is None:
	is_below_upper_median_bound = True
	else:
	is_below_upper_median_bound = mask_sum < median*MEDIAN_UPPER_BOUND
	if is_above_lower_media_bound and is_below_upper_median_bound:
	labels_im_filtered[mask > 0] = False
	mask = (mask*255).astype(np.uint8)
	contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	rect = cv2.minAreaRect(contours[0])
	if np.prod(rect[1]) > 0: # filter out if height or width = 0
	rectangles.append(rect)

	# Transform (rotated) bounding boxes to horizontal; store together with rotation angle for downstream process re-transform
	if rectangles:
	# Filter rectangles and de-skew images
	textline_images = []
	for rect in rectangles:
	width, height = rect[1]
	rotation_angle = rect[2] # clarify how to interpret and use rotation angle!

	# Convert dimensions to integer and ensure they are > 0
	width = int(width)
	height = int(height)

	# get source and destination points for image transform
	box = cv2.boxPoints(rect)
	box = np.intp(box)
	src_pts = box.astype("float32")
	dst_pts = np.array([[0, height-1],
	[0, 0],
	[width-1, 0],
	[width-1, height-1]], dtype="float32")

	try:
	M = cv2.getPerspectiveTransform(src_pts, dst_pts)
	warped = cv2.warpPerspective(img, M, (width, height))
	# Check and rotate if the text line is taller than wide
	if height > width:
	warped = cv2.rotate(warped, cv2.ROTATE_90_CLOCKWISE)
	temp = height
	height = width
	width = temp
	rotation_angle = 90-rotation_angle
	center = rect[0]
	left = center[0] - width//2
	textline_images.append((warped, center, left, height, width, rotation_angle))
	except cv2.error as e:
	print(f"Error with warpPerspective: {e}")

	# cast to dict
	keys = ['array', 'center', 'left', 'height', 'width', 'rotation_angle']
	textline_images = {key: [tup[i] for tup in textline_images] for i, key in enumerate(keys)}
	num_labels_filtered = len(textline_images['array'])
	labels_im_filtered = np.repeat(labels_im_filtered[:, :, np.newaxis], 3, axis=2).astype(np.uint8) # 3 color channels for plotting
	print(f'Kept {num_labels_filtered} of {num_labels} text segments after filtering.')
	print(f'All segments deleted smaller than {MIN_PIXEL_SUM} pixels (absolute min size).')
	if MEDIAN_LOWER_BOUND is not None:
	print(f'All segments deleted smaller than {median*MEDIAN_LOWER_BOUND} pixels (lower median bound).')
	if MEDIAN_UPPER_BOUND is not None:
	print(f'All segments deleted bigger than {median*MEDIAN_UPPER_BOUND} pixels (upper median bound).')
	if MEDIAN_LOWER_BOUND is not None or MEDIAN_UPPER_BOUND is not None:
	print(f'Median segment size (pixel sum) used for filtering: {int(median)}.')

	return textline_images, labels_im_filtered


	def ocr_on_textlines(self, textline_images, model_name="microsoft/trocr-base-handwritten"):
	"""
	Processes a list of image arrays using a pre-trained OCR model to extract text.

	Parameters:
	- textline_images (dict): A dictionary with a key 'array' that contains a list of image arrays.
	Each image array represents a line of text that will be processed by the OCR model.
	- model_name (str): A huggingface model trained for OCR on single text lines

	Returns:
	- dict: A dictionary containing a list of extracted text under the key 'preds'.

	Description:
	The function initializes the OCR model 'microsoft/trocr-base-handwritten' using Hugging Face's
	`pipeline` API for image-to-text conversion. Each image in the input list is converted from an
	array format to a PIL Image, processed by the model, and the text prediction is collected.
	The progress of image processing is printed every 10 images. The final result is a dictionary
	with the key 'preds' that holds all text predictions as a list.

	Note:
	- This function requires the `transformers` library from Hugging Face and PIL library to run.
	- Ensure that the model 'microsoft/trocr-base-handwritten' is correctly loaded and the
	`transformers` library is updated to use the pipeline.
	"""

	pipe = pipeline("image-to-text", model=model_name)

	# Model inference
	textline_preds = []
	len_array = len(textline_images['array'])
	for i, textline in enumerate(textline_images['array'][:]):
	if i % 10 == 1:
	print(f'Processing textline no. {i} of {len_array}')
	textline = Image.fromarray(textline)
	textline_preds.append(pipe(textline))

	# Convert to dict
	preds = [pred[0]['generated_text'] for pred in textline_preds]
	textline_preds_dict = {'preds': preds}

	return textline_preds_dict


	def adjust_font_size(self, draw, text, box_width):
	"""
	Adjusts the font size to ensure the text fits within a specified width.

	Parameters:
	- draw (ImageDraw.Draw): An instance of ImageDraw.Draw used to render the text.
	- text (str): The text string to be rendered.
	- box_width (int): The maximum width in pixels that the text should occupy.

	Returns:
	- ImageFont: A font object with a size adjusted to fit the text within the specified width.
	"""

	for font_size in range(1, 200): # Adjust the range as needed
	font = ImageFont.load_default(font_size)
	text_width = draw.textlength(text, font=font)
	if text_width > box_width:
	font_size = max(5, int(font_size - 10)) # min font size of 5
	return ImageFont.load_default(font_size) # Return the last fitting size
	return font # Return max size if none exceeded the box


	def create_text_overlay_image(self, textline_images, textline_preds, img_shape, font_size=-1):
	"""
	Creates an image overlay with text annotations based on provided bounding box information and predictions.

	Parameters:
	- textline_images (dict): A dictionary containing the bounding box data for each text segment.
	It should have keys 'left', 'center', 'width', and optionally 'height'. Each key should have
	a list of values corresponding to each text segment's properties.
	- textline_preds (dict): A dictionary containing the predicted text segments. It should have
	a key 'preds' which holds a list of text predictions corresponding to the bounding boxes in
	textline_images.
	- img_shape (tuple): A tuple representing the shape of the image where the text is to be drawn.
	The format should be (height, width).
	- font_size (int, optional): Specifies the font size for the text. If set to -1 (default), the font size
	is dynamically adjusted to fit the text within its bounding box width using the `adjust_font_size`
	function. If a specific integer is provided, it uses that size for all text segments.

	Returns:
	- Image: An image object with text drawn over a blank white background.

	Raises:
	- AssertionError: If the lengths of the lists in `textline_images` and `textline_preds['preds']`
	do not correspond, indicating a mismatch in the number of bounding boxes and text predictions.
	"""

	for key in textline_images.keys():
	assert len(textline_images[key]) == len(textline_preds['preds']), f'Length of {key} and preds doesnt correspond'

	# Create a blank white image
	img_gen = Image.new('RGB', (img_shape[1], img_shape[0]), color=(255, 255, 255))
	draw = ImageDraw.Draw(img_gen)

	# Draw each text segment within its bounding box
	for i in range(len(textline_preds['preds'])):
	left_x = textline_images['left'][i]
	center_y = textline_images['center'][i][1]
	#height = textline_images['height'][i]
	width = textline_images['width'][i]
	text = textline_preds['preds'][i]

	# dynamic or static text size
	if font_size==-1:
	font = self.adjust_font_size(draw, text, width)
	else:
	font = ImageFont.load_default(font_size)
	draw.text((left_x, center_y), text, fill=(0, 0, 0), font=font, align='left')

	return img_gen


	def visualize_model_output(self, prediction, img):
	"""
	Visualizes the output of a model prediction by overlaying predicted classes with distinct colors onto the original image.

	Parameters:
	- prediction (ndarray): A 3D array where the first channel holds the class predictions.
	- img (ndarray): The original image to overlay predictions onto. This should be in the same dimensions or resized accordingly.

	Returns:
	- ndarray: An image where the model's predictions are overlaid on the original image using a predefined color map.

	Description:
	The function first identifies unique classes present in the prediction's first channel. Each class is assigned a specific color from a predefined dictionary `rgb_colors`. The function then creates an output image where each pixel's color corresponds to the class predicted at that location.

	The function resizes the original image to match the dimensions of the prediction if necessary. It then blends the original image and the colored prediction output using OpenCV's `addWeighted` method to produce a final image that highlights the model's predictions with transparency.

	Note:
	- This function relies on `numpy` for array manipulations and `cv2` for image processing.
	- Ensure the `rgb_colors` dictionary contains enough colors for all classes your model can predict.
	- The function assumes `prediction` array's shape is compatible with `img`.
	"""

	unique_classes = np.unique(prediction[:,:,0])
	rgb_colors = {'0' : [255, 255, 255],
	'1' : [255, 0, 0],
	'2' : [255, 125, 0],
	'3' : [255, 0, 125],
	'4' : [125, 125, 125],
	'5' : [125, 125, 0],
	'6' : [0, 125, 255],
	'7' : [0, 125, 0],
	'8' : [125, 125, 125],
	'9' : [0, 125, 255],
	'10' : [125, 0, 125],
	'11' : [0, 255, 0],
	'12' : [0, 0, 255],
	'13' : [0, 255, 255],
	'14' : [255, 125, 125],
	'15' : [255, 0, 255]}

	output = np.zeros(prediction.shape)

	for unq_class in unique_classes:
	rgb_class_unique = rgb_colors[str(int(unq_class))]
	output[:,:,0][prediction[:,:,0]==unq_class] = rgb_class_unique[0]
	output[:,:,1][prediction[:,:,0]==unq_class] = rgb_class_unique[1]
	output[:,:,2][prediction[:,:,0]==unq_class] = rgb_class_unique[2]

	img = resize_image(img, output.shape[0], output.shape[1])

	output = output.astype(np.int32)
	img = img.astype(np.int32)

	#added_image = cv2.addWeighted(img,0.5,output,0.1,0) # orig by eynollah (gives dark image output)
	added_image = cv2.addWeighted(img,0.8,output,0.2,10)

	return added_image