แยกงานศิลปะจากอิมเมจการ์ดเกมบนโต๊ะด้วย OpenCV

10

ฉันเขียนสคริปต์ขนาดเล็กในงูหลามที่ฉันพยายามแยกหรือครอบตัดส่วนของไพ่ที่แสดงถึงงานศิลปะเท่านั้นโดยลบส่วนที่เหลือทั้งหมด ฉันได้ลองใช้วิธีการต่างๆในการนวดข้าว แต่ไม่สามารถไปถึงที่นั่นได้ นอกจากนี้โปรดทราบว่าฉันไม่สามารถบันทึกตำแหน่งของงานศิลปะแบบแมนนวลได้เพราะมันไม่ได้อยู่ในตำแหน่งหรือขนาดเดียวกันเสมอไป แต่ในรูปสี่เหลี่ยมผืนผ้าที่ทุกสิ่งอื่นเป็นเพียงข้อความและเส้นขอบ

from matplotlib import pyplot as plt
import cv2

img = cv2.imread(filename)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

ret,binary = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)

binary = cv2.bitwise_not(binary)
kernel = np.ones((15, 15), np.uint8)

closing = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)

plt.imshow(closing),plt.show()

ผลผลิตปัจจุบันเป็นสิ่งที่ใกล้เคียงที่สุดที่ฉันจะได้รับ ฉันอาจจะถูกวิธีและลองโต้เถียงเพิ่มเติมเพื่อวาดรูปสี่เหลี่ยมผืนผ้ารอบส่วนสีขาว แต่ฉันไม่คิดว่ามันเป็นวิธีที่ยั่งยืน:

โปรดทราบว่าดูการ์ดด้านล่างไม่ใช่เฟรมทั้งหมดที่มีขนาดหรือตำแหน่งเท่ากัน แต่มีงานศิลปะที่มีข้อความและเส้นขอบอยู่รอบ ๆ เท่านั้น ไม่จำเป็นต้องถูกตัดอย่างเด็ดขาด แต่ชัดเจนว่าศิลปะคือ "ภูมิภาค" ของการ์ดล้อมรอบด้วยภูมิภาคอื่น ๆ ที่มีข้อความบางส่วน เป้าหมายของฉันคือพยายามจับภาพพื้นที่ของงานศิลปะเท่าที่จะทำได้

— Waroulolz
แหล่งที่มา

เอาต์พุตแบบใดที่คุณรอจากการ์ด "Narcomoeba" มันไม่ได้มีขอบเขตรูปทรงปกติ นอกจากนี้ฉันไม่คิดว่าจะมีวิธีแก้ปัญหาหากไม่ได้รับความช่วยเหลือจากผู้ใช้

— Burak

วิธีที่ดีที่สุดที่คุณสามารถทำได้คือการคลิกที่จุดเชื่อมโยง, ปรับปรุงจุดเหล่านั้นโดยจับคู่กับมุมที่ตรวจพบที่ใกล้ที่สุดจากนั้นหารูปร่างตามขอบระหว่างจุด ฉันยังคงสงสัยว่าการใช้อัลกอริทึมที่ดีจะทำให้สำเร็จได้บ่อยครั้งที่สุด การปรับเกณฑ์การตรวจจับขอบและให้คำแนะนำเกี่ยวกับความโค้งของเส้นระหว่างจุด (คลิกซ้าย: ตรง, คลิกขวา: โค้งหรืออาจ?) ตามเวลาจริงสามารถเพิ่มโอกาสในการประสบความสำเร็จ

— Burak

1

ฉันเพิ่มตัวอย่างที่ดีกว่าลงในการ์ด Narcomoeba อย่างที่คุณเห็นฉันสนใจในขอบเขตงานศิลปะของการ์ดมันไม่จำเป็นต้องแม่นยำ 100% ฉันคิดว่าต้องมีการเปลี่ยนแปลงบางอย่างที่ทำให้ฉันสามารถแบ่งไพ่ใน 'ภูมิภาค' ที่แตกต่างกันเพื่อที่จะพูด

— Waroulolz

ฉันคิดว่าคุณสามารถครอบตัดรูปภาพเป็น 2 ประเภท (อาจเป็น 4 ประเภทตามข้อมูลที่ให้ไว้รูปภาพจะแสดงที่ด้านบนหรือด้านขวา) และใช้ opencv เพื่อตรวจสอบว่ามีข้อความในภาพหรือไม่ ดังนั้นครอบตัด -> ตัวกรอง -> ผล -> คมตัดหากต้องการง่ายกว่าสำหรับ opencv เพื่อให้ได้ผลลัพธ์ที่ดีขึ้น

— elprup

3

ฉันใช้การแปลงเส้น Hough เพื่อตรวจจับส่วนที่เป็นเส้นตรงของภาพ วกของทุกบรรทัดถูกใช้เพื่อสร้างสี่เหลี่ยมที่เป็นไปได้ทั้งหมดซึ่งไม่มีจุดข้ามอื่น ๆ เนื่องจากส่วนของบัตรที่คุณกำลังค้นหาอยู่นั้นใหญ่ที่สุดของสี่เหลี่ยมเหล่านั้น (อย่างน้อยในตัวอย่างที่คุณให้ไว้) ฉันแค่เลือกสี่เหลี่ยมที่ใหญ่ที่สุดในฐานะผู้ชนะ สคริปต์ทำงานโดยไม่มีการโต้ตอบกับผู้ใช้

import cv2
import numpy as np
from collections import defaultdict

def segment_by_angle_kmeans(lines, k=2, **kwargs):
    #Groups lines based on angle with k-means.
    #Uses k-means on the coordinates of the angle on the unit circle 
    #to segment `k` angles inside `lines`.

    # Define criteria = (type, max_iter, epsilon)
    default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER
    criteria = kwargs.get('criteria', (default_criteria_type, 10, 1.0))
    flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS)
    attempts = kwargs.get('attempts', 10)

    # returns angles in [0, pi] in radians
    angles = np.array([line[0][1] for line in lines])
    # multiply the angles by two and find coordinates of that angle
    pts = np.array([[np.cos(2*angle), np.sin(2*angle)]
                    for angle in angles], dtype=np.float32)

    # run kmeans on the coords
    labels, centers = cv2.kmeans(pts, k, None, criteria, attempts, flags)[1:]
    labels = labels.reshape(-1)  # transpose to row vec

    # segment lines based on their kmeans label
    segmented = defaultdict(list)
    for i, line in zip(range(len(lines)), lines):
        segmented[labels[i]].append(line)
    segmented = list(segmented.values())
    return segmented

def intersection(line1, line2):
    #Finds the intersection of two lines given in Hesse normal form.
    #Returns closest integer pixel locations.
    #See https://stackoverflow.com/a/383527/5087436

    rho1, theta1 = line1[0]
    rho2, theta2 = line2[0]

    A = np.array([
        [np.cos(theta1), np.sin(theta1)],
        [np.cos(theta2), np.sin(theta2)]
    ])
    b = np.array([[rho1], [rho2]])
    x0, y0 = np.linalg.solve(A, b)
    x0, y0 = int(np.round(x0)), int(np.round(y0))
    return [[x0, y0]]


def segmented_intersections(lines):
    #Finds the intersections between groups of lines.

    intersections = []
    for i, group in enumerate(lines[:-1]):
        for next_group in lines[i+1:]:
            for line1 in group:
                for line2 in next_group:
                    intersections.append(intersection(line1, line2)) 
    return intersections

def rect_from_crossings(crossings):
    #find all rectangles without other points inside
    rectangles = []

    # Search all possible rectangles
    for i in range(len(crossings)):
        x1= int(crossings[i][0][0])
        y1= int(crossings[i][0][1])

        for j in range(len(crossings)):
            x2= int(crossings[j][0][0])
            y2= int(crossings[j][0][1])

            #Search all points
            flag = 1
            for k in range(len(crossings)):
                x3= int(crossings[k][0][0])
                y3= int(crossings[k][0][1])

                #Dont count double (reverse rectangles)
                if (x1 > x2 or y1 > y2):
                    flag = 0
                #Dont count rectangles with points inside   
                elif ((((x3 >= x1) and (x2 >= x3))and (y3 > y1) and (y2 > y3) or ((x3 > x1) and (x2 > x3))and (y3 >= y1) and (y2 >= y3))):    
                    if(i!=k and j!=k):    
                        flag = 0

            if flag:
                rectangles.append([[x1,y1],[x2,y2]])

    return rectangles

if __name__ == '__main__':
    #img = cv2.imread('TAJFp.jpg')
    #img = cv2.imread('Bj2uu.jpg')
    img = cv2.imread('yi8db.png')

    width = int(img.shape[1])
    height = int(img.shape[0])

    scale = 380/width
    dim = (int(width*scale), int(height*scale))
    # resize image
    img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) 

    img2 = img.copy()
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray,(5,5),cv2.BORDER_DEFAULT)

    # Parameters of Canny and Hough may have to be tweaked to work for as many cards as possible
    edges = cv2.Canny(gray,10,45,apertureSize = 7)
    lines = cv2.HoughLines(edges,1,np.pi/90,160)

    segmented = segment_by_angle_kmeans(lines)
    crossings = segmented_intersections(segmented)
    rectangles = rect_from_crossings(crossings)

    #Find biggest remaining rectangle
    size = 0
    for i in range(len(rectangles)):
        x1 = rectangles[i][0][0]
        x2 = rectangles[i][1][0]
        y1 = rectangles[i][0][1]
        y2 = rectangles[i][1][1]

        if(size < (abs(x1-x2)*abs(y1-y2))):
            size = abs(x1-x2)*abs(y1-y2)
            x1_rect = x1
            x2_rect = x2
            y1_rect = y1
            y2_rect = y2

    cv2.rectangle(img2, (x1_rect,y1_rect), (x2_rect,y2_rect), (0,0,255), 2)
    roi = img[y1_rect:y2_rect, x1_rect:x2_rect]

    cv2.imshow("Output",roi)
    cv2.imwrite("Output.png", roi)
    cv2.waitKey()

นี่คือผลลัพธ์ที่มีตัวอย่างที่คุณให้ไว้:

รหัสสำหรับการค้นหาการข้ามบรรทัดสามารถพบได้ที่นี่: ค้นหาจุดตัดของสองบรรทัดที่วาดโดยใช้ houghlines opencv

คุณสามารถอ่านเพิ่มเติมเกี่ยวกับสาย Hough ที่นี่

— M. Martin
แหล่งที่มา

2

ขอบคุณสำหรับการทำงานอย่างหนัก คำตอบของคุณคือสิ่งที่ฉันกำลังมองหา ฉันรู้ว่า Hough Lines จะมีบทบาทสำคัญที่นี่ ฉันลองตัวเองสองสามครั้งเพื่อใช้ แต่ไม่สามารถแก้ปัญหาของคุณได้ ในขณะที่คุณแสดงความคิดเห็นปรับแต่งไม่กี่จะต้องทำกับพารามิเตอร์เพื่อสรุปแนวทาง แต่ตรรกะที่ดีและมีประสิทธิภาพ

— Waroulolz

1

ฉันคิดว่ามันเป็นทางออกที่ดีสำหรับปัญหาประเภทนี้ไม่จำเป็นต้องป้อนข้อมูลผู้ใช้ ไชโย !!

— Meto

@Meto - ฉันซาบซึ้งกับงานที่ทำที่นี่ แต่ฉันไม่เห็นด้วยไม่มีส่วนที่ผู้ใช้ป้อน เป็นเพียงนามแฝงไม่ว่าคุณจะป้อนข้อมูลที่รันไทม์หรือเปลี่ยนขีด จำกัด หลังจากค้นหาผลลัพธ์

— Burak

1

@Burak - ฉันสามารถเรียกใช้ตัวอย่างทั้งหมดที่มาพร้อมกับการตั้งค่าเดียวกันดังนั้นฉันสมมติว่าการ์ดอื่น ๆ ส่วนใหญ่จะทำงานเช่นกัน ดังนั้นการตั้งค่า theshold จะต้องทำเพียงครั้งเดียว

— M. Martin

0

เรารู้ว่าไพ่มีขอบเขตตรงตามแนวแกน x และ y เราสามารถใช้สิ่งนี้เพื่อแยกส่วนของภาพ รหัสต่อไปนี้ใช้ตรวจจับเส้นแนวนอนและแนวตั้งในภาพ

import cv2
import numpy as np

def mouse_callback(event, x, y, flags, params):
    global num_click
    if num_click < 2 and event == cv2.EVENT_LBUTTONDOWN:
        num_click = num_click + 1
        print(num_click)
        global upper_bound, lower_bound, left_bound, right_bound
        upper_bound.append(max(i for i in hor if i < y) + 1)
        lower_bound.append(min(i for i in hor if i > y) - 1)
        left_bound.append(max(i for i in ver if i < x) + 1)
        right_bound.append(min(i for i in ver if i > x) - 1)

filename = 'image.png'
thr = 100  # edge detection threshold
lined = 50  # number of consequtive True pixels required an axis to be counted as line
num_click = 0  # select only twice
upper_bound, lower_bound, left_bound, right_bound = [], [], [], []
winname = 'img'

cv2.namedWindow(winname)
cv2.setMouseCallback(winname, mouse_callback)

img = cv2.imread(filename, 1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
bw = cv2.Canny(gray, thr, 3*thr)

height, width, _ = img.shape

# find horizontal lines
hor = []
for i in range (0, height-1):
    count = 0
    for j in range (0, width-1):
        if bw[i,j]:
            count = count + 1
        else:
            count = 0
        if count >= lined:
            hor.append(i)
            break

# find vertical lines
ver = []
for j in range (0, width-1):
    count = 0
    for i in range (0, height-1):
        if bw[i,j]:
            count = count + 1
        else:
            count = 0
        if count >= lined:
            ver.append(j)
            break

# draw lines
disp_img = np.copy(img)
for i in hor:
    cv2.line(disp_img, (0, i), (width-1, i), (0,0,255), 1)
for i in ver:
    cv2.line(disp_img, (i, 0), (i, height-1), (0,0,255), 1)

while num_click < 2:
    cv2.imshow(winname, disp_img)
    cv2.waitKey(10)
disp_img = img[min(upper_bound):max(lower_bound), min(left_bound):max(right_bound)]
cv2.imshow(winname, disp_img)
cv2.waitKey()   # Press any key to exit
cv2.destroyAllWindows()

คุณเพียงแค่ต้องคลิกสองพื้นที่เพื่อรวม พื้นที่คลิกตัวอย่างและผลลัพธ์ที่สอดคล้องกันมีดังนี้:

ผลลัพธ์จากภาพอื่น ๆ :

— Burak
แหล่งที่มา

0

ฉันไม่คิดว่ามันเป็นไปได้ที่จะครอบตัด ROI ของงานศิลปะโดยอัตโนมัติโดยใช้เทคนิคการประมวลผลภาพแบบดั้งเดิมเนื่องจากลักษณะของสีมิติสถานที่และพื้นผิวแบบไดนามิกสำหรับแต่ละการ์ด คุณจะต้องมองเข้าไปในเครื่อง / การเรียนรู้อย่างลึกซึ้งและฝึกฝนลักษณนามของคุณเองถ้าคุณต้องการที่จะทำมันโดยอัตโนมัติ นี่คือแนวทางแบบแมนนวลในการเลือกและครอบตัด ROI คงที่จากภาพ

แนวคิดคือการใช้cv2.setMouseCallback()และตัวจัดการเหตุการณ์เพื่อตรวจสอบว่ามีการคลิกหรือปล่อยเมาส์ สำหรับการใช้งานนี้คุณสามารถแยก ROI ของงานศิลปะได้โดยกดปุ่มซ้ายของเมาส์ค้างไว้แล้วลากเพื่อเลือก ROI ที่ต้องการ เมื่อคุณเลือก ROI ที่ต้องการแล้วให้กดcเพื่อครอบตัดและบันทึก ROI คุณสามารถรีเซ็ต ROI โดยใช้ปุ่มเมาส์ขวา

งานศิลปะที่บันทึกไว้ ROIs

รหัส

import cv2

class ExtractArtworkROI(object):
    def __init__(self):
        # Load image
        self.original_image = cv2.imread('1.png')
        self.clone = self.original_image.copy()
        cv2.namedWindow('image')
        cv2.setMouseCallback('image', self.extractROI)
        self.selected_ROI = False

        # ROI bounding box reference points
        self.image_coordinates = []

    def extractROI(self, event, x, y, flags, parameters):
        # Record starting (x,y) coordinates on left mouse button click
        if event == cv2.EVENT_LBUTTONDOWN:
            self.image_coordinates = [(x,y)]

        # Record ending (x,y) coordintes on left mouse button release
        elif event == cv2.EVENT_LBUTTONUP:
            # Remove old bounding box
            if self.selected_ROI:
                self.clone = self.original_image.copy()

            # Draw rectangle 
            self.selected_ROI = True
            self.image_coordinates.append((x,y))
            cv2.rectangle(self.clone, self.image_coordinates[0], self.image_coordinates[1], (36,255,12), 2)

            print('top left: {}, bottom right: {}'.format(self.image_coordinates[0], self.image_coordinates[1]))
            print('x,y,w,h : ({}, {}, {}, {})'.format(self.image_coordinates[0][0], self.image_coordinates[0][1], self.image_coordinates[1][0] - self.image_coordinates[0][0], self.image_coordinates[1][1] - self.image_coordinates[0][1]))

        # Clear drawing boxes on right mouse button click
        elif event == cv2.EVENT_RBUTTONDOWN:
            self.selected_ROI = False
            self.clone = self.original_image.copy()

    def show_image(self):
        return self.clone

    def crop_ROI(self):
        if self.selected_ROI:
            x1 = self.image_coordinates[0][0]
            y1 = self.image_coordinates[0][1]
            x2 = self.image_coordinates[1][0]
            y2 = self.image_coordinates[1][1]

            # Extract ROI
            self.cropped_image = self.original_image.copy()[y1:y2, x1:x2]

            # Display and save image
            cv2.imshow('Cropped Image', self.cropped_image)
            cv2.imwrite('ROI.png', self.cropped_image)
        else:
            print('Select ROI before cropping!')

if __name__ == '__main__':
    extractArtworkROI = ExtractArtworkROI()
    while True:
        cv2.imshow('image', extractArtworkROI.show_image())
        key = cv2.waitKey(1)

        # Close program with keyboard 'q'
        if key == ord('q'):
            cv2.destroyAllWindows()
            exit(1)

        # Crop ROI
        if key == ord('c'):
            extractArtworkROI.crop_ROI()

— nathancy
แหล่งที่มา