MDQE视频实例分割

我要开发同款
匿名用户2024年07月31日
26阅读
所属分类aipytorch、CVPR2023、video instance segme、cv
开源地址https://modelscope.cn/models/marquezx/cv_mdqe_video-instance-segmentation
授权协议Apache License 2.0

作品详情

模型描述 (Model Description)

MDQE (Mining Discriminative Query Embeddings) (CVPR2023) 是一种视频实例分割算法,对视频中遮挡严重的物体分割尤其有效。

详情可参考论文

?Paper

模型流程图 Mask图

运行环境 (Operating environment)

Dependencies and Installation

# git clone the original repository
git clone https://github.com/MinghanLi/MDQE_CVPR2023.git
cd MDQE_CVPR2023
  • Installation guide link
# Install modelscope
pip install modelscope

代码范例 (Code example)

from modelscope.pipelines import pipeline
from modelscope.models import Model
from modelscope.outputs import OutputKeys
from PIL import Image
import os

import argparse
import glob
import multiprocessing as mp
import os
import time
import cv2
import tqdm
import numpy as np

from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.utils.logger import setup_logger
from detectron2.utils.file_io import PathManager

from predictor import VisualizationDemo
from mdqe import add_mdqe_config

def setup_cfg(args):
    # load config from file and command-line arguments
    cfg = get_cfg()
    add_mdqe_config(cfg)
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.MODEL.WEIGHTS = args.checkpoint
    cfg.freeze()
    return cfg


def get_parser():
    parser = argparse.ArgumentParser(description="Detectron2 demo for builtin models")
    parser.add_argument(
        "--config-file",
        default="configs/R50_ovis_360.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--input", nargs="+", default=["test_imgs_sub"] ,help="A list of space separated input images")
    parser.add_argument(
        "--output", default='test_out',help="A file or directory to save output visualizations."
    )

    parser.add_argument(
        "--checkpoint",
        default="mdqe_r50_ovis_bs16_360p_f4.pth",
        help="Path to the checkpoint pth",
    )
    parser.add_argument(
        "--save-frames",
        default=True,
        help="Save frame level image outputs.",
    )
    parser.add_argument(
        "--opts",
        help="Modify config options using the command-line 'KEY VALUE' pairs",
        default=[],
        nargs=argparse.REMAINDER,
    )
    return parser


args = get_parser().parse_args()

cfg = setup_cfg(args)

inference = pipeline('marquezx/cv_mdqe_video-instance-segmentation', model='marquezx/cv_mdqe_video-instance-segmentation', model_revision='v1.0.0')

if len(args.input) == 1:
    args.input = glob.glob(os.path.expanduser(args.input[0]))
    assert args.input, "The input path(s) was not found"

if not os.path.isdir(args.output):
    PathManager.mkdirs(args.output)

for vid_path in tqdm.tqdm(args.input, disable=not args.output):
    vid_file = vid_path.split("/")[-1]
    out_vid_path = os.path.join(args.output, vid_file)
    if args.save_frames and not os.path.isdir(out_vid_path):
        PathManager.mkdirs(out_vid_path)

    vid_frame_paths = sorted(PathManager.ls(vid_path))
    vid_frames = []
    for img_file in vid_frame_paths:
        img_path = os.path.join(vid_path, img_file)
        # use PIL, to be consistent with evaluation
        img = read_image(img_path, format="BGR")
        vid_frames.append(img)
    vid_frames = np.array(vid_frames)
    output = inference(vid_frames)

    predictions, visualized_output = output
    if args.save_frames:
        for img_file, _vis_output in zip(vid_frame_paths, visualized_output):
            out_filename = os.path.join(out_vid_path, img_file)
            _vis_output.save(out_filename)

    H, W = visualized_output[0].height, visualized_output[0].width

    cap = cv2.VideoCapture(-1)
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(out_vid_path + ".mp4" , fourcc, 10.0, (W, H), True)
    for _vis_output in visualized_output:
        frame = _vis_output.get_image()[:, :, ::-1]
        out.write(frame)
    cap.release()
    out.release()

Citation

If you find our work helpful for your research, please consider citing the following BibTeX entry.

@InProceedings{Li_2023_CVPR,
    author    = {Li, Minghan and Li, Shuai and Xiang, Wangmeng and Zhang, Lei},
    title     = {MDQE: Mining Discriminative Query Embeddings To Segment Occluded Instances on Challenging Videos},
    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
    month     = {June},
    year      = {2023},
    pages     = {10524-10533}
}
声明:本文仅代表作者观点,不代表本站立场。如果侵犯到您的合法权益,请联系我们删除侵权资源!如果遇到资源链接失效,请您通过评论或工单的方式通知管理员。未经允许,不得转载,本站所有资源文章禁止商业使用运营!
下载安装【程序员客栈】APP
实时对接需求、及时收发消息、丰富的开放项目需求、随时随地查看项目状态

评论