diff --git a/.gitignore b/.gitignore index 741ef92..cce6855 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,9 @@ __pycache__/ model*/* env/* .DS_Store +Images/tmp.png +Images/screenshot.png +test.py +Images/1680x1050_grid.png +Images/screenshot.png +Images/tmp.png diff --git a/Images/1920x1080_grid.png b/Images/1920x1080_grid.png new file mode 100644 index 0000000..566c66b Binary files /dev/null and b/Images/1920x1080_grid.png differ diff --git a/Images/image1.png b/Images/image1.png new file mode 100644 index 0000000..cd80d5b Binary files /dev/null and b/Images/image1.png differ diff --git a/Images/screenshot.png b/Images/screenshot.png new file mode 100644 index 0000000..dada5f4 Binary files /dev/null and b/Images/screenshot.png differ diff --git a/Images/tmp.png b/Images/tmp.png new file mode 100644 index 0000000..8416898 Binary files /dev/null and b/Images/tmp.png differ diff --git a/PyAudio-0.2.11-cp38-cp38-win_amd64.whl b/PyAudio-0.2.11-cp38-cp38-win_amd64.whl new file mode 100644 index 0000000..4842e13 Binary files /dev/null and b/PyAudio-0.2.11-cp38-cp38-win_amd64.whl differ diff --git a/docs/userguide.md b/docs/userguide.md index ca1b6f9..6d55c2f 100644 --- a/docs/userguide.md +++ b/docs/userguide.md @@ -7,6 +7,7 @@ Sidekick takes voice commands and converts them to actions on the computer. It u - `text` - this mode transcribes spoken speech to text - `alpha` - this mode provides the ability to write individual letters, numbers, and punctuation - `pause` - in this mode no commands are processed (convenient if afk). +- `volume` - this mode allows the user to control the mouse with the volume of their voice In each mode certain keywords are linked to certain actions. To switch between modes, simply say `command`, `mouse`, `text`, or `alpha`. Say `pause` once to pause. Say `time to work` to restart back in the `command` state. @@ -56,6 +57,11 @@ Some commands are stateless, in that they function no matter what state/mode you - `hold` - holds down left mouse button until you say another word - useful for drag and drop or on Mac when need to hold and release to switch windows - `hot` - hot key press (ex: `hot control alt delete go` presses `ctrl alt delete`) - using the word `apple` for the command key (ex: `hot apple f go` presses `command f`) +#### New Stateful commands +- `screenshot` opens a window that shows a real time screenshot with a red grid overlay. This overlay corresponds to the grid-based mouse control. To turn of screenshot, say the `screenshot` keyword again +- `overlay` overlays a red grid over the entire screen. To turn off the overlay, close the window manually + + #### Examples - `scroll up 1 2 1` - will scroll up 1, then 2, then 1 again - number can be repeated without repeating entire command @@ -115,4 +121,16 @@ The alpha mode enables punctuation as well as single alphanumeric characters. #### Examples -- `cap hello alpha comma text how are you alpha question` - will produce the text 'Hello, how are you?' \ No newline at end of file +- `cap hello alpha comma text how are you alpha question` - will produce the text 'Hello, how are you?' + +## Volume +Volume mode allows the mouse to be controlled using the volume of your voice +- `up` switch to vertical movement (default) +- `left` switch to horizontal movement +- `slow` mouse moves at a slow speed +- `medium` mouse moves at a medium speed (default) +- `fast` mouse moves at a fast speed +- `stop` exits to command mode + + + diff --git a/overlay.py b/overlay.py new file mode 100644 index 0000000..82bd03c --- /dev/null +++ b/overlay.py @@ -0,0 +1,51 @@ +import sys +import time +import threading +from PyQt5.QtWidgets import QApplication, QWidget, QLabel +from PyQt5.QtGui import QIcon, QPixmap +from PyQt5 import QtCore + +# References: https://pythonspot.com/pyqt5-image/ +# https://stackoverflow.com/questions/1925015/pyqt-always-on-top +# https://stackoverflow.com/questions/37941039/pyqt-transparent-background-image-partially-black + +class Grid_Overlay(QWidget): + + def __init__(self): + super().__init__() + self.title = 'Screenshot' + self.left = 10 + self.top = 10 + self.width = 640 + self.height = 480 + self.grid = "Images/1920x1080_grid.png" + self.initUI() + + def initUI(self): + self.setWindowTitle(self.title) + self.setGeometry(self.left, self.top, self.width, self.height) + self.setWindowFlag(QtCore.Qt.FramelessWindowHint) + self.setWindowFlag(QtCore.Qt.WindowStaysOnTopHint) + self.setAttribute(QtCore.Qt.WA_TranslucentBackground, True) + # self.setAttribute(QtCore.Qt.WindowStaysOnTopHint, True) + + # Create widget + label = QLabel(self) + pixmap = QPixmap(self.grid) + label.setPixmap(pixmap) + self.resize(pixmap.width(),pixmap.height()) + self.show() + def set_grid(self, filename): + self.grid = filename + +def overlay(filename): + app = QApplication([]) + ex = Grid_Overlay() + ex.set_grid(filename) + app.exec_() + app.quit() + +def main(): + overlay() +if __name__ == '__main__': + main() diff --git a/parsepackage/command_parser.py b/parsepackage/command_parser.py index 199646c..f0b0fb2 100644 --- a/parsepackage/command_parser.py +++ b/parsepackage/command_parser.py @@ -16,15 +16,20 @@ along with this program. If not, see . ''' from actions import * +from screenshot import * +from overlay import overlay import string - +import threading +from os.path import exists class CommandParser: def __init__(self, system, steps): self.os = system self.steps = steps self.tempvar = "" - + self.stop_screenshot = [False] + self.screenshot_started = False + self.screen_size = (1920, 1080) self.keys = ['a', 'b', 'c', 'd', 'e','f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z','alt','delete','control','shift','tab','apple'] @@ -125,6 +130,8 @@ def __init__(self, system, steps): "west", "save", "scroll", + "screenshot", + "overlay", ] self.commandlist = ( @@ -381,6 +388,35 @@ def evaluate_command(self, command_buffer): else: hotKeyPress(["ctrl", "s"]) command_buffer = [] + elif command_buffer[0] == "line": + hotKeyPress(["end"]) + hotKeyPress(["shift", "home"]) + command_buffer = [] + elif command_buffer[0] == "copy line": + hotKeyPress(["end"]) + hotKeyPress(["shift", "home"]) + if self.os == "Darwin": + hotKeyPress(["command", "c"]) + else: + hotKeyPress(["ctrl", "c"]) + command_buffer = [] + elif command_buffer[0] == "cut line": + hotKeyPress(["end"]) + hotKeyPress(["shift", "home"]) + if self.os == "Darwin": + hotKeyPress(["command", "x"]) + else: + hotKeyPress(["ctrl", "x"]) + command_buffer = [] + elif command_buffer[0] == "loop": + pyautogui.write("for (int i = 0; i < N; i++) {") + hotKeyPress(["enter"]) + pyautogui.write("continue;") + hotKeyPress(["enter"]) + pyautogui.write("}") + hotKeyPress(["enter"]) + hotKeyPress(["up", "up", "end"]) + command_buffer = [] elif command_buffer[0] == "switch": if self.os == "Darwin": @@ -548,6 +584,36 @@ def evaluate_command(self, command_buffer): return self.handle_invalid_command( command_buffer[1], command_buffer ) + elif command_buffer[0] == "screenshot": + if self.screenshot_started == False and self.stop_screenshot[0] == True: + self.stop_screenshot[0] = False + + if self.screenshot_started == False: + print(command_buffer) + w = self.screen_size[0] + h = self.screen_size[1] + grid = "Images/{}x{}_grid.png".format(w,h) + if not exists(grid): + create_gridlines(w, h) + + p = threading.Thread(target=take_screenshot, args=(w, h, grid, self.stop_screenshot)) + p.start() + self.screenshot_started = True + else: + self.stop_screenshot[0] = True + self.screenshot_started = False + command_buffer=[] + + elif command_buffer[0] == "overlay": + print("Showing grid overlay. Close the window manually to continue using Sidekick.") + w = self.screen_size[0] + h = self.screen_size[1] + grid = "Images/{}x{}_grid.png".format(w,h) + if not exists(grid): + create_gridlines(w, h) + overlay(grid) + command_buffer=[] + else: command_buffer = [] diff --git a/parsepackage/horizontal_parser.py b/parsepackage/horizontal_parser.py new file mode 100644 index 0000000..73a83b7 --- /dev/null +++ b/parsepackage/horizontal_parser.py @@ -0,0 +1,112 @@ +''' +Sidekick +Copyright (C) 2021 UT-Battelle - Created by Sean Oesch + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +''' +from actions import * +import threading +import math +import audioop + + + +class HorizontalParser: + def __init__(self, system, steps): + self.volumeStarted = False + self.os = system + self.steps = steps + self.stopVolume = True + + self.commands = [ + "stop", + "snail", + "slow", + "fast", + "medium", + "up", + "down", + "counter", + "clock", + "north", + "south", + "east", + "west", + "one", + "two", + "three", + "four", + "northeast", + "northwest", + "southeast", + "southwest", + ] + def set_threshold(self, threshold): + self.threshold = threshold + + self.midpoint = (self.threshold + 55) /2 + + def set_audio_stream(self, stream): + self.stream = stream + + def evaluate_volume(self, command_buffer): + if not self.volumeStarted: + + self.stopVolume = False + self.magnitude = 5 # in pixels + self.sleep = 0.2 + self.setVolumeCoord(90) + + data = self.stream.read(4000,exception_on_overflow = False) + # calculate decibels + dB = 20 * math.log10(audioop.rms(data,2)+1) + + # if len(command_buffer) > 0: + """print("Volume " + str(dB)) + if dB < 45: + print("MOM") + self.setVolumeCoord(self.currentangle + 15) + elif dB >= 45: + print("WOW") + self.setVolumeCoord(self.currentangle - 15)""" + command_buffer = [] + + if not self.volumeStarted: + self.startVolume() + + return [command_buffer, "volume"] + + def startVolume(self): + thread = threading.Thread(target=self.volume_thread) + thread.daemon = True + thread.start() + self.volumeStarted = True + + def setVolumeCoord(self, degrees): + print("start") + self.currentangle = degrees + self.x = self.magnitude * math.cos(math.radians(degrees)) + print(self.x) + self.y = -1 * self.magnitude * math.sin(math.radians(degrees)) + print(self.y) + return + + def volume_thread(self): + while True: + if self.stopVolume: + self.volumeStarted = False + break + else: + moveMouse(self.x, self.y) + time.sleep(self.sleep) diff --git a/parsepackage/parser.py b/parsepackage/parser.py index 02a9ede..a882ebc 100644 --- a/parsepackage/parser.py +++ b/parsepackage/parser.py @@ -17,11 +17,14 @@ ''' from actions import * import platform + +from parsepackage.program_parser import ProgramParser from .mouse_parser import MouseParser from .text_parser import TextParser from .command_parser import CommandParser from .alpha_parser import AlphaParser - +from .volume_parser import VolumeParser +from .horizontal_parser import HorizontalParser class Parser: def __init__(self): @@ -29,6 +32,7 @@ def __init__(self): self.state = "command" self.command_buffer = [] self.pause = False + self.dB = 0 self.stepmapping = { "one": 10, @@ -47,13 +51,16 @@ def __init__(self): "at": 1500, } - self.states = ["text", "command", "mouse", "pause", "alpha"] + self.states = ["text", "command", "pause", "alpha", "volume", "horizontal", "mouse"] #mouse self.steps = ["one", "two", "three", "four", "five", "six", "seven", "eight"] self.mouseParser = MouseParser(self.os, self.stepmapping) self.textParser = TextParser(self.os, self.stepmapping) self.commandParser = CommandParser(self.os, self.stepmapping) + self.programParser = ProgramParser(self.os) self.alphaParser = AlphaParser(self.os) + self.volumeParser = VolumeParser(self.os, self.stepmapping) + self.horizontalParser = HorizontalParser(self.os, self.stepmapping) # nontextcommands can be fed to a speech to text model to make it work more effectively for commands self.nontextcommands = list( @@ -69,6 +76,18 @@ def __init__(self): ) # ingest string that may contain multiple space delimited words, where each word is a sent to parser individually + def set_threshold(self, threshold): + self.volumeParser.set_threshold(threshold) + self.horizontalParser.set_threshold(threshold) + + def set_audio_stream(self, stream): + self.volumeParser.set_audio_stream(stream) + self.horizontalParser.set_audio_stream(stream) + + def set_screen_size(self, screen_size): + self.commandParser.screen_size = screen_size + + def ingest(self, words): # print(word.lower()) for word in words.split(" "): @@ -85,7 +104,7 @@ def ingest(self, words): self.evaluate() def evaluate(self): - + print("evaluate") if self.pause: if self.command_buffer[0] == "time": @@ -113,15 +132,32 @@ def evaluate(self): elif self.command_buffer[-1] == "text": self.state = "text" self.command_buffer = [] + elif self.command_buffer[-1] == "code": + self.state = "program" + self.command_buffer = [] elif self.command_buffer[-1] == "alpha": self.state = "alpha" self.command_buffer = [] - elif self.command_buffer[-1] == "mouse": + elif self.command_buffer[-1] == "mouse": self.state = "mouse" self.command_buffer = [] self.command_buffer, self.state = self.mouseParser.evaluate_mouse( self.command_buffer ) + elif self.command_buffer[-1] == "volume": + print("This was executed") + self.state = "volume" + self.command_buffer = [] + self.command_buffer, self.state = self.volumeParser.evaluate_volume( + self.command_buffer, + self.dB + ) + elif self.command_buffer[-1] == "horizontal": + self.state = "horizontal" + self.command_buffer = [] + self.command_buffer, self.state = self.horizontalParser.evaluate_volume( + self.command_buffer + ) else: # send command to appropriate parsing function if len(self.command_buffer) > 0: ( @@ -137,6 +173,10 @@ def evaluate(self): self.command_buffer = self.textParser.evaluate_text( self.command_buffer ) + elif self.state == "program": + self.command_buffer = self.programParser.evaluate_text( + self.command_buffer + ) elif self.state == "alpha": self.command_buffer = self.alphaParser.evaluate_text( self.command_buffer @@ -146,7 +186,20 @@ def evaluate(self): self.command_buffer, self.state, ) = self.mouseParser.evaluate_mouse(self.command_buffer) - + elif self.state == "volume": + ( + self.command_buffer, + self.state, + ) = self.volumeParser.evaluate_volume(self.command_buffer, self.dB) + elif self.state == "horizontal": + ( + self.command_buffer, + self.state, + ) = self.horizontalParser.evaluate_volume(self.command_buffer) # stop mouse if state is switched before stopping if not self.mouseParser.stopMouse and self.state != "mouse": self.mouseParser.stopMouse = True + if not self.volumeParser.stopVolume and self.state != "volume": + self.volumeParser.stopVolume = True + if not self.horizontalParser.stopVolume and self.state != "horizontal": + self.horizontalParser.stopVolume = True diff --git a/parsepackage/program_parser.py b/parsepackage/program_parser.py new file mode 100644 index 0000000..a36b679 --- /dev/null +++ b/parsepackage/program_parser.py @@ -0,0 +1,156 @@ +''' +Sidekick +Copyright (C) 2021 UT-Battelle - Created by Sean Oesch + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +''' +from click import command +from actions import * +import string + + +class ProgramParser: + def __init__(self, system): + self.os = system + self.numbers = [ + "zero", + "one", + "two", + "three", + "four", + "five", + "six", + "seven", + "eight", + "nine", + ] + self.punctuation = [ + "ren", + "len", + "rack", + "equals", + "lack", + "period", + "colon", + "dash", + "comma", + "underscore", + "question", + "dot", + "hash", + "semicolon", + "bang", + "cap", + "exclamation", + "quote", + "single", + "if", + "while", + "for", + "and", + "or", + "mod" + ] + self.keywords = list(string.ascii_lowercase) + self.punctuation + self.numbers + + def word_to_int(self, word): + mapping = { + "zero": "0", + "one": "1", + "two": "2", + "three": "3", + "four": "4", + "five": "5", + "six": "6", + "seven": "7", + "eight": "8", + "nine": "9", + } + return mapping[word] + + def insert_punctuation(self, text): + if text == "period": + text = text.replace("period", ".") + elif text == "equals": + text = text.replace("equals", "==") + elif text == "ren": + text = text.replace("ren", ")") + elif text == "len": + text = text.replace("len", "(") + elif text == "lack": + text = text.replace("lack", "[") + elif text == "rack": + text = text.replace("rack", "]") + elif text == "colon": + text = text.replace("colon", ":") + elif text == "dash": + text = text.replace("dash", "-") + elif text == "comma": + text = text.replace("comma", ",") + elif text == "question": + text = text.replace("question", "?") + elif text == "dot": + text = text.replace("dot", ".") + elif text == "quote": + text = text.replace("quote", '"') + elif text == "hash": + text = text.replace("hash", "#") + elif text == "single": + text = text.replace("single", "'") + elif text == "underscore": + text = text.replace("underscore", "_") + elif text == "semicolon": + text = text.replace("semicolon", ";") + elif text == "bang" or text == "exclamation": + text = text.replace("bang", "!").replace("exclamation", "!") + elif text == "if": + text = text.replace("if", "if ():\n") + elif text == "while": + text = text.replace("while", "while ():\n") + elif text == "for": + text = text.replace("for", "for ():\n") + elif text == "and": + text = text.replace("and", "&&") + elif text == "or": + text = text.replace("or", "||") + elif text == "mod": + text = text.replace("mod", "%") + elif text == "assign": + text = text.replace("assign", "=") + elif text == "same": + text = text.replace("same", "===") + return text + + def evaluate_text(self, command_buffer): + if command_buffer[0] == "cap": # capitalize next word spoken + if len(command_buffer) >= 2: + writeToScreen(command_buffer[1].capitalize()) + if len(command_buffer) > 2: + command_buffer = command_buffer[2:] + else: + command_buffer = [] + else: + for i in range(0, len(command_buffer)): + # some punctuation includes backspace and space after - other does not + + if command_buffer[i] in self.punctuation: + writeToScreen(self.insert_punctuation(command_buffer[i])) + elif command_buffer[i] in self.numbers: + writeToScreen(self.word_to_int(command_buffer[i])) + else: + writeToScreen(command_buffer[i]) + + command_buffer = [] + + return command_buffer \ No newline at end of file diff --git a/parsepackage/volume_parser.py b/parsepackage/volume_parser.py new file mode 100644 index 0000000..dcd0e0f --- /dev/null +++ b/parsepackage/volume_parser.py @@ -0,0 +1,131 @@ +''' +Sidekick +Copyright (C) 2021 UT-Battelle - Created by Sean Oesch + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +''' +from actions import * +import threading +import math +import audioop + + + +class VolumeParser: + def __init__(self, system, steps): + self.volumeStarted = False + self.os = system + self.steps = steps + self.stopVolume = True + self.dB = 0.0 + self.thresh = [0, 0, 0] + self.vert = True + self.horiz = False + + self.commands = [ + "stop", + "left", #left and right + "up", #up and down + "slow", + "medium", + "fast", + ] + def set_threshold(self, threshold): + self.threshold = threshold + + self.midpoint = (self.threshold + 55) /2 + + def set_audio_stream(self, stream): + self.stream = stream + + def evaluate_volume(self, command_buffer, dB): + + + if not self.volumeStarted: + self.stopVolume = False + self.magnitude = 10 # in pixels + self.sleep = 0.2 + self.setVolumeCoord(90) + print("Starting...") + + if len(command_buffer) != 0: + if command_buffer[0] == 'stop': + self.stopVolume = True + self.volumeStarted = False + + if command_buffer[0] == 'up': + self.vert = True + self.horiz = False + + if command_buffer[0] == 'left': + self.vert = False + self.horiz = True + + if command_buffer[0] == "slow": + self.magnitude = 5 + if command_buffer[0] == "medium": + self.magnitude = 10 + if command_buffer[0] == "fast": + self.magnitude = 20 + + # calculate decibels + data = self.stream.read(4000,exception_on_overflow = False) + self.dB = 20 * math.log10(audioop.rms(data,2)+1) + + if self.vert: + if self.dB < 35 and self.dB > self.thresh[0]: + self.setVolumeCoord(270) + elif self.dB >= self.thresh[2]: + self.setVolumeCoord(90) + command_buffer = [] + + if self.horiz: + if self.dB < 35 and self.dB > self.thresh[0]: + self.setVolumeCoord(0) + elif self.dB >= self.thresh[2]: + self.setVolumeCoord(180) + command_buffer = [] + + + + + command_buffer = [] + + if not self.volumeStarted: + self.startVolume() + + + return [command_buffer, "volume"] + + def startVolume(self): + thread = threading.Thread(target=self.volume_thread) + thread.daemon = True + thread.start() + self.volumeStarted = True + + def setVolumeCoord(self, degrees): + if self.stopVolume == False: + self.currentangle = degrees + self.x = self.magnitude * math.cos(math.radians(degrees)) + self.y = -1 * self.magnitude * math.sin(math.radians(degrees)) + return + + def volume_thread(self): + while True: + if self.stopVolume: + self.volumeStarted = False + break + else: + moveMouse(self.x, self.y) + time.sleep(self.sleep) diff --git a/screenshot.py b/screenshot.py new file mode 100644 index 0000000..6275066 --- /dev/null +++ b/screenshot.py @@ -0,0 +1,77 @@ +import re +import numpy as np +import cv2 +import pyautogui +from PIL import Image, ImageDraw +import time + + +def get_pos(width): + #There are 11 gridlines for x and y + line_space = np.round_(width/11, 0) + x_pos = [] + count = 0 + for i in range(1, 11): + count += line_space + x_pos.append(count) + return x_pos + + + +#Set the resolution, probably want this to be changable + + +#Create the gridlines +def create_gridlines(w, h): + x_pos = get_pos(w) + y_pos = get_pos(h) + new_image = Image.new(mode='RGBA', size=(w, h), color=(255,255,255,0)) + for x in x_pos: + draw = ImageDraw.Draw(new_image) + #x = new_image.width / 2 + y0 = 0 + y1 = new_image.height + line = ((x, y0), (x, y1)) + draw.line(line, fill="red", width=2) + del draw + + for y in y_pos: + draw = ImageDraw.Draw(new_image) + #x = new_image.width / 2 + x0 = 0 + x1 = new_image.width + line = ((x0, y), (x1, y)) + draw.line(line, fill="red", width=3) + del draw + new_image.save("Images/{}x{}_grid.png".format(w,h)) + return "Images/{}x{}_grid.png".format(w,h) + + + +#https://www.geeksforgeeks.org/how-to-take-screenshots-using-python/ +#https://www.geeksforgeeks.org/overlay-an-image-on-another-image-in-python/ +#https://www.codegrepper.com/code-examples/python/display+image+python+small+screen +#https://www.etutorialspoint.com/index.php/319-python-opencv-overlaying-or-blending-two-images +#https://stackoverflow.com/questions/57736832/how-can-you-read-rgba-using-opencv + +def take_screenshot(w, h, grid_file, stop): + while not stop[0]: + cv2.namedWindow("output", cv2.WINDOW_NORMAL) + image = pyautogui.screenshot() + image = cv2.cvtColor(np.array(image), + cv2.COLOR_RGB2BGRA) + cv2.imwrite("Images/screenshot.png", image) + + + im1 = Image.open("Images/screenshot.png").convert("RGBA") + im2 = Image.open(grid_file).convert("RGBA") + im1.paste(im2, (0,0), mask = im2) + # Displaying the image + im1.save("Images/tmp.png") + im = cv2.imread("Images/tmp.png") + + imS = cv2.resize(im, (w, h)) + cv2.imshow("output", imS) + k = cv2.waitKey(17) + cv2.destroyAllWindows() + exit() \ No newline at end of file diff --git a/sidekick.cfg b/sidekick.cfg new file mode 100644 index 0000000..fbb0302 --- /dev/null +++ b/sidekick.cfg @@ -0,0 +1,3 @@ +{ +"resolution": "1920x1080" +} \ No newline at end of file diff --git a/sidekick.py b/sidekick.py index 5f5da7e..7db6fd0 100644 --- a/sidekick.py +++ b/sidekick.py @@ -15,19 +15,25 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . ''' +from errno import EHOSTDOWN +from black import wrap_stream_for_windows from vosk import Model, KaldiRecognizer +import sys import os import json import audioop import string import math from parsepackage import * +from parsepackage.volume_parser import VolumeParser if not os.path.exists("model"): print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.") exit (1) import pyaudio +import json +import json parser = parser.Parser() @@ -38,21 +44,24 @@ def listToList(words): wordlist = wordlist.strip(",") + "]" return wordlist -def setRec(state,crec,trec,arec): +def setRec(state,crec,trec,arec, prec): if state == "text": return trec - elif state == "command" or state == "mouse": + elif state == "program": + return prec + elif state == "command" or state == "mouse" or state == "volume": return crec else: return arec -def clearRec(crec,trec,arec): +def clearRec(crec,trec,arec,prec): crec.Result() trec.Result() arec.Result() + prec.Result() -def stateSwap(nextstate,crec,trec,arec): - rec = setRec(nextstate,crec,trec,arec) +def stateSwap(nextstate,crec,trec,arec, prec): + rec = setRec(nextstate,crec,trec,arec, prec) res = json.loads(rec.Result()) swap = False if res["text"] != "": @@ -62,28 +71,53 @@ def stateSwap(nextstate,crec,trec,arec): if res["text"] == nextstate: swap = True - clearRec(crec,trec,arec) + clearRec(crec,trec,arec,prec) -def ingest(currentstate,crec,trec,arec): - rec = setRec(currentstate,crec,trec,arec) +def ingest(currentstate,crec,trec,arec, prec): + rec = setRec(currentstate,crec,trec,arec, prec) res = json.loads(rec.Result()) # this not only returns the most accurate result, but also flushes the list of words stored internally if res["text"] != "": for text in res["text"].split(" "): - if text in ["text","alpha","command"] and text != currentstate: + if text in ["text","alpha","command", "program"] and text != currentstate: parser.ingest(text) - stateSwap(text,crec,trec,arec) + stateSwap(text,crec,trec,arec, prec) else: parser.ingest(text) - - clearRec(crec,trec,arec) + + clearRec(crec,trec,arec, prec) + +def load_config(): + f =open("sidekick.cfg") + config = json.load(f) + f.close() + return config + +config = load_config() + +# Set the screen resolution for screenshots from config file +resolution = config['resolution'].split("x") +screen_size = (int(resolution[0]), int(resolution[1])) +print(screen_size) +parser.set_screen_size(screen_size) # create wordlist for our command model so that commands will be more accurately detected +lower_buffer = 0 +upper_buffer = 0 +if len(sys.argv) >= 2: + lower_buffer = sys.argv[1] + +if len(sys.argv) >= 3: + upper_buffer = sys.argv[2] + + + commandwords = listToList(parser.nontextcommands) alphavals = listToList(parser.alphavalues) model = Model("model") # the text recommender uses the standard model for transcription textrec = KaldiRecognizer(model, 16000) +programrec = KaldiRecognizer(model, 16000) # use wordlist in our command recommender commandrec = KaldiRecognizer(model, 16000, commandwords) alpharec = KaldiRecognizer(model, 16000, alphavals) @@ -91,7 +125,7 @@ def ingest(currentstate,crec,trec,arec): p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000) stream.start_stream() - +parser.set_audio_stream(stream) print("\nSidekick at your service. Please wait silently for the threshold to be set based on ambient noise before use.") threshold_buffer = 1 # how many dB above ambient noise threshold will be set @@ -105,7 +139,7 @@ def ingest(currentstate,crec,trec,arec): data = stream.read(4000,exception_on_overflow = False) # calculate decibels - dB = 20 * math.log10(audioop.rms(data,2)) + dB = 20 * math.log10(audioop.rms(data,2)+1) # we want to set threshold based on ambient noise prior to processing audio data if not thresholdset: @@ -116,7 +150,9 @@ def ingest(currentstate,crec,trec,arec): print("Your sidekick now awaits your command.") threshold = sum(ambientvals) / len(ambientvals) + threshold_buffer print("Threshold is now set at " + str(round(threshold,2)) + " dB.") - + parser.set_threshold(threshold) + + # send audio data to model for processing when threshold breached and shortly afterward elif dB > threshold or wait == True: @@ -129,21 +165,58 @@ def ingest(currentstate,crec,trec,arec): wait = False trec = textrec.AcceptWaveform(data) + prec = programrec.AcceptWaveform(data) crec = commandrec.AcceptWaveform(data) arec = alpharec.AcceptWaveform(data) - if len(data) == 0: break + print(parser.state) if parser.state == "text": if trec: # if this returns true model has determined best word candidate - ingest(parser.state,commandrec,textrec,alpharec) + ingest(parser.state,commandrec,textrec,alpharec, programrec) else: # if false only a partial result returned - not useful for this application pass #print(rec.PartialResult()) - partial result is faster, but not accurate enough for use - + + elif parser.state == "program": + if prec: # if this returns true model has determined best word candidate + ingest(parser.state,commandrec,textrec,alpharec,programrec) + else: # if false only a partial result returned - not useful for this application + pass + elif parser.state == "alpha": if arec: # if this returns true model has determined best word candidate - ingest(parser.state,commandrec,textrec,alpharec) + ingest(parser.state,commandrec,textrec,alpharec, programrec) + + elif parser.state == "volume": + #ingest(parser.state,commandrec,textrec,alpharec,programrec) + if parser.volumeParser.volumeStarted == True: + parser.dB = dB + + lower_threshold = threshold + float(lower_buffer) + upper_threshold = 50 + float(upper_buffer) + + parser.volumeParser.thresh.append(lower_threshold) + parser.volumeParser.thresh.append((upper_threshold-lower_threshold) + lower_threshold) + parser.volumeParser.thresh.append(upper_threshold) + + parser.volumeParser.evaluate_volume(parser.command_buffer, parser.dB) + ingest(parser.state,commandrec,textrec,alpharec, programrec) + + if parser.volumeParser.stopVolume == True: + parser.state = "command" + + elif parser.state == "horizontal": + if parser.horizontalParser.volumeStarted == True: + if dB < 35: + y = parser.horizontalParser.setVolumeCoord(180) + elif dB >= 35: + parser.horizontalParser.setVolumeCoord(0) + command_buffer = [] + + else: if crec: # if this returns true model has determined best word candidate - ingest(parser.state,commandrec,textrec,alpharec) \ No newline at end of file + ingest(parser.state,commandrec,textrec,alpharec, programrec) + + diff --git a/vosk-model-small-en-us-0.15.zip b/vosk-model-small-en-us-0.15.zip new file mode 100644 index 0000000..0c94ec8 Binary files /dev/null and b/vosk-model-small-en-us-0.15.zip differ