diff --git a/sfx/collect.mp3 b/sfx/collect.mp3 new file mode 100644 index 0000000..19d6153 Binary files /dev/null and b/sfx/collect.mp3 differ diff --git a/sfx/start.mp3 b/sfx/start.mp3 new file mode 100644 index 0000000..98d875b Binary files /dev/null and b/sfx/start.mp3 differ diff --git a/track_hand.py b/track_hand.py index b4d4bd9..8ba5566 100755 --- a/track_hand.py +++ b/track_hand.py @@ -1,108 +1,202 @@ #!/usr/bin/env python3 import sys -# import time -from typing import NoReturn +import random +from enum import Enum +from typing import NoReturn, Generator +from types import ModuleType +from subprocess import Popen -import cv2 import numpy as np import mediapipe as mp +import cv2 +from cv2 import VideoCapture -def main() -> NoReturn: - cap = cv2.VideoCapture(0) - mpHands = mp.solutions.hands - hands = mpHands.Hands( - static_image_mode=False, - max_num_hands=1, - min_detection_confidence=.5, - min_tracking_confidence=.5, - ) - mpDraw = mp.solutions.drawing_utils - # pTime = 0 - # cTime = 0 +class FingerType(Enum): + BASE = 0 + BASE_RIGHT = 1 + THUMB_BASE = 2 + THUMB_KNUCKLE_1 = 3 + THUMB_TIP = 4 + INDEX_BASE = 5 + INDEX_KNUCKLE_1 = 6 + INDEX_KNUCKLE_2 = 7 + INDEX_TIP = 8 + MIDDLE_BASE = 9 + MIDDLE_KNUCKLE_1 = 10 + MIDDLE_KNUCKLE_2 = 11 + MIDDLE_TIP = 12 + RING_BASE = 13 + RING_KNUCKLE_1 = 14 + RING_KNUCKLE_2 = 15 + RING_TIP = 16 + PINKY_BASE = 17 + PINKY_KNUCKLE_1 = 18 + PINKY_KNUCKLE_2 = 19 + PINKY_TIP = 20 - img = cv2.imread("42.png", 0) - if len(img.shape) == 2 or img.shape[2] == 1: +def get_42_img( + img_path: str, + margin_top: int, + margin_bottom: int, + margin_left: int, + margin_right: int, +) -> np.ndarray: + global img42_side_len + + img: np.ndarray = cv2.imread(img_path, 0) + + if len(img.shape) in [1, 2]: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + img = cv2.flip(img, 1) - height, width, _ = img.shape - margin = 100 - top_margin = margin + 20 - bottom_margin = margin + 20 - left_margin = margin - right_margin = margin - img = img[top_margin:height-bottom_margin, left_margin:width-right_margin] + img_height, img_width = img.shape[:2] + img = img[ + margin_top:img_height-margin_bottom, + margin_left:img_width-margin_right, + ] - side_length = min(640, 480) // 3 - overlay_resized = cv2.resize(img, (side_length, side_length)) - print(repr(overlay_resized)) - input() - sys.exit() + b_top, b_bottom, b_left, b_right = [10]*4 + img = cv2.copyMakeBorder(img, b_top, b_bottom, b_left, b_right, cv2.BORDER_CONSTANT, value=(0, 0, 0)) + img = cv2.resize(img, (img42_side_len, img42_side_len)) + + return img + +mp_hands = mp.solutions.hands +mp_draw: ModuleType = mp.solutions.drawing_utils + +img42_side_len = 70 +img42: np.ndarray = get_42_img( + "./42.png", + margin_top = 100 + 20, + margin_bottom = 100 + 20, + margin_left = 100, + margin_right = 100, +) + +def touches_42(x: int, y: int, img42_x: int, img42_y: int) -> bool: + global collected_42 + + return ( + img42_x <= x <= img42_x + img42_side_len + and img42_y <= y <= img42_y + img42_side_len + ) + +def add_directional_triangle( + frame: np.ndarray, + x1: int, + y1: int, + x2: int, + y2: int, + rgb: tuple[int, int, int], + side_len: int, + stretch: float, +) -> tuple[int, int]: + dir_vector = np.array([ + x1 - x2, y1 - y2 + ]).astype(np.float64) + + # normalize + dir_vector /= np.linalg.norm(dir_vector) + + triangle_height = side_len * (3**0.5) / 2 + half_base = side_len / 2 + + perp_vector = np.array([-dir_vector[1], dir_vector[0]]) + + apex_vertex = (int(x1 + dir_vector[0] * triangle_height * 2/3 * stretch), int(y1 + dir_vector[1] * triangle_height * 2/3 * stretch)) + left_vertex = (int(x1 - perp_vector[0] * half_base - dir_vector[0] * triangle_height/3), + int(y1 - perp_vector[1] * half_base - dir_vector[1] * triangle_height/3)) + right_vertex = (int(x1 + perp_vector[0] * half_base - dir_vector[0] * triangle_height/3), + int(y1 + perp_vector[1] * half_base - dir_vector[1] * triangle_height/3)) + + triangle = np.array([apex_vertex, left_vertex, right_vertex]) + cv2.drawContours(frame, [triangle], 0, rgb, -1) + + return apex_vertex + +def get_finger_positions( + frame: np.ndarray, + hands: mp.solutions.hands.Hands, + add_landmarks: bool, +) -> Generator[list[tuple[int, int, int]], None, None]: + height, width = frame.shape[:2] + + img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + results = hands.process(img_rgb) + + if results.multi_hand_landmarks: + for hand_landmarks in results.multi_hand_landmarks: + positions = [] + for id, lm in enumerate(hand_landmarks.landmark): + x = int(lm.x * width) + y = int(lm.y * height) + positions.append((FingerType(id), x, y)) + yield positions + if add_landmarks: + mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS) + +def show_frame(frame: np.ndarray, to_stdout: bool=False) -> None: + if to_stdout: + sys.stdout.buffer.write(frame.tobytes()) + else: + cv2.imshow("Image", frame) + cv2.waitKey(1) + +def collect_sfx() -> None: + Popen(['paplay', './sfx/collect.mp3']) + +def main() -> NoReturn: + Popen(['paplay', './sfx/start.mp3']) + + capture: VideoCapture = cv2.VideoCapture(0) + hands = mp_hands.Hands(max_num_hands=2) + collected_42 = True + img42_x = -img42_side_len - 1 + img42_y = -img42_side_len - 1 + + i = 0 while True: - success, frame = cap.read() + success: bool + frame: np.ndarray + success, frame = capture.read() if not success: continue - imgRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - results = hands.process(imgRGB) - - h, w, c = frame.shape - - img_x, img_y = 10, 10 - frame[img_y:img_y+side_length, img_x:img_x+side_length] = overlay_resized - - if results.multi_hand_landmarks: - for handLms in results.multi_hand_landmarks: - visited_7 = False - for id, lm in enumerate(handLms.landmark): - cx = int(lm.x * w) - cy = int(lm.y * h) - # 7 one-before-index, 8 index, 4 thumb, 0 base - if id == 7: - one_before_index_x = cx - one_before_index_y = cy - visited_7 = True - if id == 8: - pass - # open('/dev/pts/1', 'w').write( - # f'{id}, {cx}, {cy}\n' - # ) - - if visited_7: - dir_vector = np.array([cx - one_before_index_x, cy - one_before_index_y]) - dir_vector = dir_vector.astype(np.float64) - dir_vector /= np.linalg.norm(dir_vector) - - s = 100 - h = s * (3**0.5) / 2 - half_base = s / 2 - - perp_vector = np.array([-dir_vector[1], dir_vector[0]]) - - pt1 = (int(cx + dir_vector[0] * h * 2/3), int(cy + dir_vector[1] * h * 2/3)) - pt2 = (int(cx - perp_vector[0] * half_base - dir_vector[0] * h/3), - int(cy - perp_vector[1] * half_base - dir_vector[1] * h/3)) - pt3 = (int(cx + perp_vector[0] * half_base - dir_vector[0] * h/3), - int(cy + perp_vector[1] * half_base - dir_vector[1] * h/3)) - - triangle_cnt = np.array([pt1, pt2, pt3]) - cv2.drawContours(frame, [triangle_cnt], 0, (0,0,0), -1) - else: - cv2.circle(frame, (cx, cy), 40, (0, 0, 0), cv2.FILLED) - mpDraw.draw_landmarks(frame, handLms, mpHands.HAND_CONNECTIONS) - - # cTime = time.time() - # fps = 1 / (cTime - pTime) - # pTime = cTime - - # cv2.putText(frame, str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3) - - sys.stdout.buffer.write(frame.tobytes()) - # cv2.imshow("Image", frame) - # cv2.waitKey(1) + if i > 30: + if collected_42: + collected_42 = False + frame_height, frame_width = frame.shape[:2] + img42_x = random.randint(0, frame_width - img42_side_len - 1) + img42_y = random.randint(0, frame_height - img42_side_len - 1) + frame[ + img42_y : img42_y+img42_side_len, + img42_x : img42_x+img42_side_len, + ] = img42 + for positions in get_finger_positions(frame, hands, add_landmarks=True): + index_knuckle_1_pos: tuple[int, int] = (-1, -1) + for finger_id, finger_x, finger_y in positions: + if finger_id == FingerType.INDEX_KNUCKLE_2: + index_knuckle_1_pos = (finger_x, finger_y) + elif finger_id == FingerType.INDEX_TIP and index_knuckle_1_pos != (-1, -1): + apex_x, apex_y = add_directional_triangle( + frame, + finger_x, + finger_y, + *index_knuckle_1_pos, + (0, 0, 0,), + side_len=70, + stretch=2.0, + ) + if not collected_42 and touches_42(apex_x, apex_y, img42_x, img42_y): + collected_42 = True + i = 0 + collect_sfx() + show_frame(frame, to_stdout=True) + i += 1 if __name__ == '__main__': main() diff --git a/working_command.sh b/working_command.sh index 95d286d..06833c5 100755 --- a/working_command.sh +++ b/working_command.sh @@ -1,5 +1,36 @@ #!/bin/sh -# xterm -bg black -fg white -s -fullscreen -fa 'SauceCodePro Nerd Font Mono' -fs 8 -e '{ ./track_hand.py | 2>/dev/null ffmpeg -y -f rawvideo -s 640x480 -pix_fmt bgr24 -i - -map 0:V:0 -filter:v "format=gray,hflip" -c:v libx264 -preset ultrafast -tune zerolatency -crf 30 -f nut - | TERM=xterm-mono CACA_DRIVER=ncurses DISPLAY= mpv --really-quiet --no-cache --no-config --vo=caca --untimed --profile=low-latency - || { echo Error 1>&2; read X; }; }' # st -f 'SauceCodePro Nerd Font Mono:size=10' -e sh -c '{ ./track_hand.py | 2>/dev/null ffmpeg -y -f rawvideo -s 640x480 -pix_fmt bgr24 -i - -map 0:V:0 -filter:v "format=gray,hflip" -c:v libx265 -preset ultrafast -tune zerolatency -crf 30 -f nut - | TERM=xterm-mono CACA_DRIVER=ncurses DISPLAY= mpv --really-quiet --no-cache --no-config --vo=caca --untimed --profile=low-latency - || { echo Error 1>&2; read X; }; } | ./game.py' -st -f 'SauceCodePro Nerd Font Mono:size=10' -e sh -c '{ ./track_hand.py | 2>/dev/null ffmpeg -y -f rawvideo -s 640x480 -pix_fmt bgr24 -i - -map 0:V:0 -filter:v "format=gray,hflip" -c:v libx265 -preset ultrafast -tune zerolatency -crf 30 -f nut - | TERM=xterm-mono CACA_DRIVER=ncurses DISPLAY= mpv --really-quiet --no-cache --no-config --vo=caca --untimed --profile=low-latency - || { echo Error 1>&2; read X; }; }' +# st -f 'SauceCodePro Nerd Font Mono:size=10' -e sh -c '{ ./track_hand.py | 2>/dev/null ffmpeg -y -f rawvideo -s 640x480 -pix_fmt bgr24 -i - -map 0:V:0 -filter:v "format=gray,hflip" -c:v libx265 -preset ultrafast -tune zerolatency -crf 30 -f nut - | TERM=xterm-mono CACA_DRIVER=ncurses DISPLAY= mpv --really-quiet --no-cache --no-config --vo=caca --untimed --profile=low-latency - || { echo Error 1>&2; read X; }; }' # st -f 'SauceCodePro Nerd Font Mono:size=10' -e sh -c '{ ./track_hand.py | 2>/dev/null ffmpeg -y -f rawvideo -s 640x480 -pix_fmt bgr24 -i - -map 0:V:0 -filter:v "format=gray,hflip" -c:v libx264 -preset ultrafast -tune zerolatency -crf 30 -f nut - | TERM=xterm-mono CACA_DRIVER=ncurses DISPLAY= mpv --really-quiet --no-cache --no-config --vo=tct --untimed --profile=low-latency - || { echo Error 1>&2; read X; }; }' + +xterm \ + -bg black \ + -fg white \ + -s -fullscreen \ + -fa 'SauceCodePro Nerd Font Mono' \ + -fs 8 \ + -e '{ + ./track_hand.py | + 2>/dev/null ffmpeg -y \ + -f rawvideo \ + -s 640x480 \ + -pix_fmt bgr24 \ + -i - \ + -map 0:V:0 \ + -filter:v "format=gray,hflip" \ + -c:v libx265 \ + -preset ultrafast \ + -tune zerolatency \ + -crf 30 \ + -f nut \ + - | + TERM=xterm-mono CACA_DRIVER=ncurses DISPLAY= mpv \ + --really-quiet \ + --no-cache \ + --no-config \ + --vo=caca \ + --untimed \ + --profile=low-latency \ + - \ + || { echo Error 1>&2; read X; }; + }'