diff options
Diffstat (limited to 'scripts/r_masking/subcore.py')
-rw-r--r-- | scripts/r_masking/subcore.py | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/scripts/r_masking/subcore.py b/scripts/r_masking/subcore.py new file mode 100644 index 0000000..cf7bf7d --- /dev/null +++ b/scripts/r_masking/subcore.py @@ -0,0 +1,117 @@ +import numpy as np
+import cv2
+from PIL import Image
+
+import scripts.r_masking.core as core
+from reactor_utils import tensor_to_pil
+
+try:
+ from ultralytics import YOLO
+except Exception as e:
+ print(e)
+
+
+def load_yolo(model_path: str):
+ try:
+ return YOLO(model_path)
+ except ModuleNotFoundError:
+ # https://github.com/ultralytics/ultralytics/issues/3856
+ YOLO("yolov8n.pt")
+ return YOLO(model_path)
+
+def inference_bbox(
+ model,
+ image: Image.Image,
+ confidence: float = 0.3,
+ device: str = "",
+):
+ pred = model(image, conf=confidence, device=device)
+
+ bboxes = pred[0].boxes.xyxy.cpu().numpy()
+ cv2_image = np.array(image)
+ if len(cv2_image.shape) == 3:
+ cv2_image = cv2_image[:, :, ::-1].copy() # Convert RGB to BGR for cv2 processing
+ else:
+ # Handle the grayscale image here
+ # For example, you might want to convert it to a 3-channel grayscale image for consistency:
+ cv2_image = cv2.cvtColor(cv2_image, cv2.COLOR_GRAY2BGR)
+ cv2_gray = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2GRAY)
+
+ segms = []
+ for x0, y0, x1, y1 in bboxes:
+ cv2_mask = np.zeros(cv2_gray.shape, np.uint8)
+ cv2.rectangle(cv2_mask, (int(x0), int(y0)), (int(x1), int(y1)), 255, -1)
+ cv2_mask_bool = cv2_mask.astype(bool)
+ segms.append(cv2_mask_bool)
+
+ n, m = bboxes.shape
+ if n == 0:
+ return [[], [], [], []]
+
+ results = [[], [], [], []]
+ for i in range(len(bboxes)):
+ results[0].append(pred[0].names[int(pred[0].boxes[i].cls.item())])
+ results[1].append(bboxes[i])
+ results[2].append(segms[i])
+ results[3].append(pred[0].boxes[i].conf.cpu().numpy())
+
+ return results
+
+
+class UltraBBoxDetector:
+ bbox_model = None
+
+ def __init__(self, bbox_model):
+ self.bbox_model = bbox_model
+
+ def detect(self, image, threshold, dilation, crop_factor, drop_size=1, detailer_hook=None):
+ drop_size = max(drop_size, 1)
+ detected_results = inference_bbox(self.bbox_model, tensor_to_pil(image), threshold)
+ segmasks = core.create_segmasks(detected_results)
+
+ if dilation > 0:
+ segmasks = core.dilate_masks(segmasks, dilation)
+
+ items = []
+ h = image.shape[1]
+ w = image.shape[2]
+
+ for x, label in zip(segmasks, detected_results[0]):
+ item_bbox = x[0]
+ item_mask = x[1]
+
+ y1, x1, y2, x2 = item_bbox
+
+ if x2 - x1 > drop_size and y2 - y1 > drop_size: # minimum dimension must be (2,2) to avoid squeeze issue
+ crop_region = core.make_crop_region(w, h, item_bbox, crop_factor)
+
+ if detailer_hook is not None:
+ crop_region = detailer_hook.post_crop_region(w, h, item_bbox, crop_region)
+
+ cropped_image = core.crop_image(image, crop_region)
+ cropped_mask = core.crop_ndarray2(item_mask, crop_region)
+ confidence = x[2]
+ # bbox_size = (item_bbox[2]-item_bbox[0],item_bbox[3]-item_bbox[1]) # (w,h)
+
+ item = core.SEG(cropped_image, cropped_mask, confidence, crop_region, item_bbox, label, None)
+
+ items.append(item)
+
+ shape = image.shape[1], image.shape[2]
+ segs = shape, items
+
+ if detailer_hook is not None and hasattr(detailer_hook, "post_detection"):
+ segs = detailer_hook.post_detection(segs)
+
+ return segs
+
+ def detect_combined(self, image, threshold, dilation):
+ detected_results = inference_bbox(self.bbox_model, core.tensor2pil(image), threshold)
+ segmasks = core.create_segmasks(detected_results)
+ if dilation > 0:
+ segmasks = core.dilate_masks(segmasks, dilation)
+
+ return core.combine_masks(segmasks)
+
+ def setAux(self, x):
+ pass
|