diff --git a/.gitignore b/.gitignore
index 741ef92..cce6855 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,9 @@ __pycache__/
model*/*
env/*
.DS_Store
+Images/tmp.png
+Images/screenshot.png
+test.py
+Images/1680x1050_grid.png
+Images/screenshot.png
+Images/tmp.png
diff --git a/Images/1920x1080_grid.png b/Images/1920x1080_grid.png
new file mode 100644
index 0000000..566c66b
Binary files /dev/null and b/Images/1920x1080_grid.png differ
diff --git a/Images/image1.png b/Images/image1.png
new file mode 100644
index 0000000..cd80d5b
Binary files /dev/null and b/Images/image1.png differ
diff --git a/Images/screenshot.png b/Images/screenshot.png
new file mode 100644
index 0000000..dada5f4
Binary files /dev/null and b/Images/screenshot.png differ
diff --git a/Images/tmp.png b/Images/tmp.png
new file mode 100644
index 0000000..8416898
Binary files /dev/null and b/Images/tmp.png differ
diff --git a/PyAudio-0.2.11-cp38-cp38-win_amd64.whl b/PyAudio-0.2.11-cp38-cp38-win_amd64.whl
new file mode 100644
index 0000000..4842e13
Binary files /dev/null and b/PyAudio-0.2.11-cp38-cp38-win_amd64.whl differ
diff --git a/docs/userguide.md b/docs/userguide.md
index ca1b6f9..6d55c2f 100644
--- a/docs/userguide.md
+++ b/docs/userguide.md
@@ -7,6 +7,7 @@ Sidekick takes voice commands and converts them to actions on the computer. It u
- `text` - this mode transcribes spoken speech to text
- `alpha` - this mode provides the ability to write individual letters, numbers, and punctuation
- `pause` - in this mode no commands are processed (convenient if afk).
+- `volume` - this mode allows the user to control the mouse with the volume of their voice
In each mode certain keywords are linked to certain actions. To switch between modes, simply say `command`, `mouse`, `text`, or `alpha`. Say `pause` once to pause. Say `time to work` to restart back in the `command` state.
@@ -56,6 +57,11 @@ Some commands are stateless, in that they function no matter what state/mode you
- `hold` - holds down left mouse button until you say another word - useful for drag and drop or on Mac when need to hold and release to switch windows
- `hot` - hot key press (ex: `hot control alt delete go` presses `ctrl alt delete`) - using the word `apple` for the command key (ex: `hot apple f go` presses `command f`)
+#### New Stateful commands
+- `screenshot` opens a window that shows a real time screenshot with a red grid overlay. This overlay corresponds to the grid-based mouse control. To turn of screenshot, say the `screenshot` keyword again
+- `overlay` overlays a red grid over the entire screen. To turn off the overlay, close the window manually
+
+
#### Examples
- `scroll up 1 2 1` - will scroll up 1, then 2, then 1 again - number can be repeated without repeating entire command
@@ -115,4 +121,16 @@ The alpha mode enables punctuation as well as single alphanumeric characters.
#### Examples
-- `cap hello alpha comma text how are you alpha question` - will produce the text 'Hello, how are you?'
\ No newline at end of file
+- `cap hello alpha comma text how are you alpha question` - will produce the text 'Hello, how are you?'
+
+## Volume
+Volume mode allows the mouse to be controlled using the volume of your voice
+- `up` switch to vertical movement (default)
+- `left` switch to horizontal movement
+- `slow` mouse moves at a slow speed
+- `medium` mouse moves at a medium speed (default)
+- `fast` mouse moves at a fast speed
+- `stop` exits to command mode
+
+
+
diff --git a/overlay.py b/overlay.py
new file mode 100644
index 0000000..82bd03c
--- /dev/null
+++ b/overlay.py
@@ -0,0 +1,51 @@
+import sys
+import time
+import threading
+from PyQt5.QtWidgets import QApplication, QWidget, QLabel
+from PyQt5.QtGui import QIcon, QPixmap
+from PyQt5 import QtCore
+
+# References: https://pythonspot.com/pyqt5-image/
+# https://stackoverflow.com/questions/1925015/pyqt-always-on-top
+# https://stackoverflow.com/questions/37941039/pyqt-transparent-background-image-partially-black
+
+class Grid_Overlay(QWidget):
+
+ def __init__(self):
+ super().__init__()
+ self.title = 'Screenshot'
+ self.left = 10
+ self.top = 10
+ self.width = 640
+ self.height = 480
+ self.grid = "Images/1920x1080_grid.png"
+ self.initUI()
+
+ def initUI(self):
+ self.setWindowTitle(self.title)
+ self.setGeometry(self.left, self.top, self.width, self.height)
+ self.setWindowFlag(QtCore.Qt.FramelessWindowHint)
+ self.setWindowFlag(QtCore.Qt.WindowStaysOnTopHint)
+ self.setAttribute(QtCore.Qt.WA_TranslucentBackground, True)
+ # self.setAttribute(QtCore.Qt.WindowStaysOnTopHint, True)
+
+ # Create widget
+ label = QLabel(self)
+ pixmap = QPixmap(self.grid)
+ label.setPixmap(pixmap)
+ self.resize(pixmap.width(),pixmap.height())
+ self.show()
+ def set_grid(self, filename):
+ self.grid = filename
+
+def overlay(filename):
+ app = QApplication([])
+ ex = Grid_Overlay()
+ ex.set_grid(filename)
+ app.exec_()
+ app.quit()
+
+def main():
+ overlay()
+if __name__ == '__main__':
+ main()
diff --git a/parsepackage/command_parser.py b/parsepackage/command_parser.py
index 199646c..f0b0fb2 100644
--- a/parsepackage/command_parser.py
+++ b/parsepackage/command_parser.py
@@ -16,15 +16,20 @@
along with this program. If not, see .
'''
from actions import *
+from screenshot import *
+from overlay import overlay
import string
-
+import threading
+from os.path import exists
class CommandParser:
def __init__(self, system, steps):
self.os = system
self.steps = steps
self.tempvar = ""
-
+ self.stop_screenshot = [False]
+ self.screenshot_started = False
+ self.screen_size = (1920, 1080)
self.keys = ['a', 'b', 'c', 'd', 'e','f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z','alt','delete','control','shift','tab','apple']
@@ -125,6 +130,8 @@ def __init__(self, system, steps):
"west",
"save",
"scroll",
+ "screenshot",
+ "overlay",
]
self.commandlist = (
@@ -381,6 +388,35 @@ def evaluate_command(self, command_buffer):
else:
hotKeyPress(["ctrl", "s"])
command_buffer = []
+ elif command_buffer[0] == "line":
+ hotKeyPress(["end"])
+ hotKeyPress(["shift", "home"])
+ command_buffer = []
+ elif command_buffer[0] == "copy line":
+ hotKeyPress(["end"])
+ hotKeyPress(["shift", "home"])
+ if self.os == "Darwin":
+ hotKeyPress(["command", "c"])
+ else:
+ hotKeyPress(["ctrl", "c"])
+ command_buffer = []
+ elif command_buffer[0] == "cut line":
+ hotKeyPress(["end"])
+ hotKeyPress(["shift", "home"])
+ if self.os == "Darwin":
+ hotKeyPress(["command", "x"])
+ else:
+ hotKeyPress(["ctrl", "x"])
+ command_buffer = []
+ elif command_buffer[0] == "loop":
+ pyautogui.write("for (int i = 0; i < N; i++) {")
+ hotKeyPress(["enter"])
+ pyautogui.write("continue;")
+ hotKeyPress(["enter"])
+ pyautogui.write("}")
+ hotKeyPress(["enter"])
+ hotKeyPress(["up", "up", "end"])
+ command_buffer = []
elif command_buffer[0] == "switch":
if self.os == "Darwin":
@@ -548,6 +584,36 @@ def evaluate_command(self, command_buffer):
return self.handle_invalid_command(
command_buffer[1], command_buffer
)
+ elif command_buffer[0] == "screenshot":
+ if self.screenshot_started == False and self.stop_screenshot[0] == True:
+ self.stop_screenshot[0] = False
+
+ if self.screenshot_started == False:
+ print(command_buffer)
+ w = self.screen_size[0]
+ h = self.screen_size[1]
+ grid = "Images/{}x{}_grid.png".format(w,h)
+ if not exists(grid):
+ create_gridlines(w, h)
+
+ p = threading.Thread(target=take_screenshot, args=(w, h, grid, self.stop_screenshot))
+ p.start()
+ self.screenshot_started = True
+ else:
+ self.stop_screenshot[0] = True
+ self.screenshot_started = False
+ command_buffer=[]
+
+ elif command_buffer[0] == "overlay":
+ print("Showing grid overlay. Close the window manually to continue using Sidekick.")
+ w = self.screen_size[0]
+ h = self.screen_size[1]
+ grid = "Images/{}x{}_grid.png".format(w,h)
+ if not exists(grid):
+ create_gridlines(w, h)
+ overlay(grid)
+ command_buffer=[]
+
else:
command_buffer = []
diff --git a/parsepackage/horizontal_parser.py b/parsepackage/horizontal_parser.py
new file mode 100644
index 0000000..73a83b7
--- /dev/null
+++ b/parsepackage/horizontal_parser.py
@@ -0,0 +1,112 @@
+'''
+Sidekick
+Copyright (C) 2021 UT-Battelle - Created by Sean Oesch
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+'''
+from actions import *
+import threading
+import math
+import audioop
+
+
+
+class HorizontalParser:
+ def __init__(self, system, steps):
+ self.volumeStarted = False
+ self.os = system
+ self.steps = steps
+ self.stopVolume = True
+
+ self.commands = [
+ "stop",
+ "snail",
+ "slow",
+ "fast",
+ "medium",
+ "up",
+ "down",
+ "counter",
+ "clock",
+ "north",
+ "south",
+ "east",
+ "west",
+ "one",
+ "two",
+ "three",
+ "four",
+ "northeast",
+ "northwest",
+ "southeast",
+ "southwest",
+ ]
+ def set_threshold(self, threshold):
+ self.threshold = threshold
+
+ self.midpoint = (self.threshold + 55) /2
+
+ def set_audio_stream(self, stream):
+ self.stream = stream
+
+ def evaluate_volume(self, command_buffer):
+ if not self.volumeStarted:
+
+ self.stopVolume = False
+ self.magnitude = 5 # in pixels
+ self.sleep = 0.2
+ self.setVolumeCoord(90)
+
+ data = self.stream.read(4000,exception_on_overflow = False)
+ # calculate decibels
+ dB = 20 * math.log10(audioop.rms(data,2)+1)
+
+ # if len(command_buffer) > 0:
+ """print("Volume " + str(dB))
+ if dB < 45:
+ print("MOM")
+ self.setVolumeCoord(self.currentangle + 15)
+ elif dB >= 45:
+ print("WOW")
+ self.setVolumeCoord(self.currentangle - 15)"""
+ command_buffer = []
+
+ if not self.volumeStarted:
+ self.startVolume()
+
+ return [command_buffer, "volume"]
+
+ def startVolume(self):
+ thread = threading.Thread(target=self.volume_thread)
+ thread.daemon = True
+ thread.start()
+ self.volumeStarted = True
+
+ def setVolumeCoord(self, degrees):
+ print("start")
+ self.currentangle = degrees
+ self.x = self.magnitude * math.cos(math.radians(degrees))
+ print(self.x)
+ self.y = -1 * self.magnitude * math.sin(math.radians(degrees))
+ print(self.y)
+ return
+
+ def volume_thread(self):
+ while True:
+ if self.stopVolume:
+ self.volumeStarted = False
+ break
+ else:
+ moveMouse(self.x, self.y)
+ time.sleep(self.sleep)
diff --git a/parsepackage/parser.py b/parsepackage/parser.py
index 02a9ede..a882ebc 100644
--- a/parsepackage/parser.py
+++ b/parsepackage/parser.py
@@ -17,11 +17,14 @@
'''
from actions import *
import platform
+
+from parsepackage.program_parser import ProgramParser
from .mouse_parser import MouseParser
from .text_parser import TextParser
from .command_parser import CommandParser
from .alpha_parser import AlphaParser
-
+from .volume_parser import VolumeParser
+from .horizontal_parser import HorizontalParser
class Parser:
def __init__(self):
@@ -29,6 +32,7 @@ def __init__(self):
self.state = "command"
self.command_buffer = []
self.pause = False
+ self.dB = 0
self.stepmapping = {
"one": 10,
@@ -47,13 +51,16 @@ def __init__(self):
"at": 1500,
}
- self.states = ["text", "command", "mouse", "pause", "alpha"]
+ self.states = ["text", "command", "pause", "alpha", "volume", "horizontal", "mouse"] #mouse
self.steps = ["one", "two", "three", "four", "five", "six", "seven", "eight"]
self.mouseParser = MouseParser(self.os, self.stepmapping)
self.textParser = TextParser(self.os, self.stepmapping)
self.commandParser = CommandParser(self.os, self.stepmapping)
+ self.programParser = ProgramParser(self.os)
self.alphaParser = AlphaParser(self.os)
+ self.volumeParser = VolumeParser(self.os, self.stepmapping)
+ self.horizontalParser = HorizontalParser(self.os, self.stepmapping)
# nontextcommands can be fed to a speech to text model to make it work more effectively for commands
self.nontextcommands = list(
@@ -69,6 +76,18 @@ def __init__(self):
)
# ingest string that may contain multiple space delimited words, where each word is a sent to parser individually
+ def set_threshold(self, threshold):
+ self.volumeParser.set_threshold(threshold)
+ self.horizontalParser.set_threshold(threshold)
+
+ def set_audio_stream(self, stream):
+ self.volumeParser.set_audio_stream(stream)
+ self.horizontalParser.set_audio_stream(stream)
+
+ def set_screen_size(self, screen_size):
+ self.commandParser.screen_size = screen_size
+
+
def ingest(self, words):
# print(word.lower())
for word in words.split(" "):
@@ -85,7 +104,7 @@ def ingest(self, words):
self.evaluate()
def evaluate(self):
-
+ print("evaluate")
if self.pause:
if self.command_buffer[0] == "time":
@@ -113,15 +132,32 @@ def evaluate(self):
elif self.command_buffer[-1] == "text":
self.state = "text"
self.command_buffer = []
+ elif self.command_buffer[-1] == "code":
+ self.state = "program"
+ self.command_buffer = []
elif self.command_buffer[-1] == "alpha":
self.state = "alpha"
self.command_buffer = []
- elif self.command_buffer[-1] == "mouse":
+ elif self.command_buffer[-1] == "mouse":
self.state = "mouse"
self.command_buffer = []
self.command_buffer, self.state = self.mouseParser.evaluate_mouse(
self.command_buffer
)
+ elif self.command_buffer[-1] == "volume":
+ print("This was executed")
+ self.state = "volume"
+ self.command_buffer = []
+ self.command_buffer, self.state = self.volumeParser.evaluate_volume(
+ self.command_buffer,
+ self.dB
+ )
+ elif self.command_buffer[-1] == "horizontal":
+ self.state = "horizontal"
+ self.command_buffer = []
+ self.command_buffer, self.state = self.horizontalParser.evaluate_volume(
+ self.command_buffer
+ )
else: # send command to appropriate parsing function
if len(self.command_buffer) > 0:
(
@@ -137,6 +173,10 @@ def evaluate(self):
self.command_buffer = self.textParser.evaluate_text(
self.command_buffer
)
+ elif self.state == "program":
+ self.command_buffer = self.programParser.evaluate_text(
+ self.command_buffer
+ )
elif self.state == "alpha":
self.command_buffer = self.alphaParser.evaluate_text(
self.command_buffer
@@ -146,7 +186,20 @@ def evaluate(self):
self.command_buffer,
self.state,
) = self.mouseParser.evaluate_mouse(self.command_buffer)
-
+ elif self.state == "volume":
+ (
+ self.command_buffer,
+ self.state,
+ ) = self.volumeParser.evaluate_volume(self.command_buffer, self.dB)
+ elif self.state == "horizontal":
+ (
+ self.command_buffer,
+ self.state,
+ ) = self.horizontalParser.evaluate_volume(self.command_buffer)
# stop mouse if state is switched before stopping
if not self.mouseParser.stopMouse and self.state != "mouse":
self.mouseParser.stopMouse = True
+ if not self.volumeParser.stopVolume and self.state != "volume":
+ self.volumeParser.stopVolume = True
+ if not self.horizontalParser.stopVolume and self.state != "horizontal":
+ self.horizontalParser.stopVolume = True
diff --git a/parsepackage/program_parser.py b/parsepackage/program_parser.py
new file mode 100644
index 0000000..a36b679
--- /dev/null
+++ b/parsepackage/program_parser.py
@@ -0,0 +1,156 @@
+'''
+Sidekick
+Copyright (C) 2021 UT-Battelle - Created by Sean Oesch
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+'''
+from click import command
+from actions import *
+import string
+
+
+class ProgramParser:
+ def __init__(self, system):
+ self.os = system
+ self.numbers = [
+ "zero",
+ "one",
+ "two",
+ "three",
+ "four",
+ "five",
+ "six",
+ "seven",
+ "eight",
+ "nine",
+ ]
+ self.punctuation = [
+ "ren",
+ "len",
+ "rack",
+ "equals",
+ "lack",
+ "period",
+ "colon",
+ "dash",
+ "comma",
+ "underscore",
+ "question",
+ "dot",
+ "hash",
+ "semicolon",
+ "bang",
+ "cap",
+ "exclamation",
+ "quote",
+ "single",
+ "if",
+ "while",
+ "for",
+ "and",
+ "or",
+ "mod"
+ ]
+ self.keywords = list(string.ascii_lowercase) + self.punctuation + self.numbers
+
+ def word_to_int(self, word):
+ mapping = {
+ "zero": "0",
+ "one": "1",
+ "two": "2",
+ "three": "3",
+ "four": "4",
+ "five": "5",
+ "six": "6",
+ "seven": "7",
+ "eight": "8",
+ "nine": "9",
+ }
+ return mapping[word]
+
+ def insert_punctuation(self, text):
+ if text == "period":
+ text = text.replace("period", ".")
+ elif text == "equals":
+ text = text.replace("equals", "==")
+ elif text == "ren":
+ text = text.replace("ren", ")")
+ elif text == "len":
+ text = text.replace("len", "(")
+ elif text == "lack":
+ text = text.replace("lack", "[")
+ elif text == "rack":
+ text = text.replace("rack", "]")
+ elif text == "colon":
+ text = text.replace("colon", ":")
+ elif text == "dash":
+ text = text.replace("dash", "-")
+ elif text == "comma":
+ text = text.replace("comma", ",")
+ elif text == "question":
+ text = text.replace("question", "?")
+ elif text == "dot":
+ text = text.replace("dot", ".")
+ elif text == "quote":
+ text = text.replace("quote", '"')
+ elif text == "hash":
+ text = text.replace("hash", "#")
+ elif text == "single":
+ text = text.replace("single", "'")
+ elif text == "underscore":
+ text = text.replace("underscore", "_")
+ elif text == "semicolon":
+ text = text.replace("semicolon", ";")
+ elif text == "bang" or text == "exclamation":
+ text = text.replace("bang", "!").replace("exclamation", "!")
+ elif text == "if":
+ text = text.replace("if", "if ():\n")
+ elif text == "while":
+ text = text.replace("while", "while ():\n")
+ elif text == "for":
+ text = text.replace("for", "for ():\n")
+ elif text == "and":
+ text = text.replace("and", "&&")
+ elif text == "or":
+ text = text.replace("or", "||")
+ elif text == "mod":
+ text = text.replace("mod", "%")
+ elif text == "assign":
+ text = text.replace("assign", "=")
+ elif text == "same":
+ text = text.replace("same", "===")
+ return text
+
+ def evaluate_text(self, command_buffer):
+ if command_buffer[0] == "cap": # capitalize next word spoken
+ if len(command_buffer) >= 2:
+ writeToScreen(command_buffer[1].capitalize())
+ if len(command_buffer) > 2:
+ command_buffer = command_buffer[2:]
+ else:
+ command_buffer = []
+ else:
+ for i in range(0, len(command_buffer)):
+ # some punctuation includes backspace and space after - other does not
+
+ if command_buffer[i] in self.punctuation:
+ writeToScreen(self.insert_punctuation(command_buffer[i]))
+ elif command_buffer[i] in self.numbers:
+ writeToScreen(self.word_to_int(command_buffer[i]))
+ else:
+ writeToScreen(command_buffer[i])
+
+ command_buffer = []
+
+ return command_buffer
\ No newline at end of file
diff --git a/parsepackage/volume_parser.py b/parsepackage/volume_parser.py
new file mode 100644
index 0000000..dcd0e0f
--- /dev/null
+++ b/parsepackage/volume_parser.py
@@ -0,0 +1,131 @@
+'''
+Sidekick
+Copyright (C) 2021 UT-Battelle - Created by Sean Oesch
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+'''
+from actions import *
+import threading
+import math
+import audioop
+
+
+
+class VolumeParser:
+ def __init__(self, system, steps):
+ self.volumeStarted = False
+ self.os = system
+ self.steps = steps
+ self.stopVolume = True
+ self.dB = 0.0
+ self.thresh = [0, 0, 0]
+ self.vert = True
+ self.horiz = False
+
+ self.commands = [
+ "stop",
+ "left", #left and right
+ "up", #up and down
+ "slow",
+ "medium",
+ "fast",
+ ]
+ def set_threshold(self, threshold):
+ self.threshold = threshold
+
+ self.midpoint = (self.threshold + 55) /2
+
+ def set_audio_stream(self, stream):
+ self.stream = stream
+
+ def evaluate_volume(self, command_buffer, dB):
+
+
+ if not self.volumeStarted:
+ self.stopVolume = False
+ self.magnitude = 10 # in pixels
+ self.sleep = 0.2
+ self.setVolumeCoord(90)
+ print("Starting...")
+
+ if len(command_buffer) != 0:
+ if command_buffer[0] == 'stop':
+ self.stopVolume = True
+ self.volumeStarted = False
+
+ if command_buffer[0] == 'up':
+ self.vert = True
+ self.horiz = False
+
+ if command_buffer[0] == 'left':
+ self.vert = False
+ self.horiz = True
+
+ if command_buffer[0] == "slow":
+ self.magnitude = 5
+ if command_buffer[0] == "medium":
+ self.magnitude = 10
+ if command_buffer[0] == "fast":
+ self.magnitude = 20
+
+ # calculate decibels
+ data = self.stream.read(4000,exception_on_overflow = False)
+ self.dB = 20 * math.log10(audioop.rms(data,2)+1)
+
+ if self.vert:
+ if self.dB < 35 and self.dB > self.thresh[0]:
+ self.setVolumeCoord(270)
+ elif self.dB >= self.thresh[2]:
+ self.setVolumeCoord(90)
+ command_buffer = []
+
+ if self.horiz:
+ if self.dB < 35 and self.dB > self.thresh[0]:
+ self.setVolumeCoord(0)
+ elif self.dB >= self.thresh[2]:
+ self.setVolumeCoord(180)
+ command_buffer = []
+
+
+
+
+ command_buffer = []
+
+ if not self.volumeStarted:
+ self.startVolume()
+
+
+ return [command_buffer, "volume"]
+
+ def startVolume(self):
+ thread = threading.Thread(target=self.volume_thread)
+ thread.daemon = True
+ thread.start()
+ self.volumeStarted = True
+
+ def setVolumeCoord(self, degrees):
+ if self.stopVolume == False:
+ self.currentangle = degrees
+ self.x = self.magnitude * math.cos(math.radians(degrees))
+ self.y = -1 * self.magnitude * math.sin(math.radians(degrees))
+ return
+
+ def volume_thread(self):
+ while True:
+ if self.stopVolume:
+ self.volumeStarted = False
+ break
+ else:
+ moveMouse(self.x, self.y)
+ time.sleep(self.sleep)
diff --git a/screenshot.py b/screenshot.py
new file mode 100644
index 0000000..6275066
--- /dev/null
+++ b/screenshot.py
@@ -0,0 +1,77 @@
+import re
+import numpy as np
+import cv2
+import pyautogui
+from PIL import Image, ImageDraw
+import time
+
+
+def get_pos(width):
+ #There are 11 gridlines for x and y
+ line_space = np.round_(width/11, 0)
+ x_pos = []
+ count = 0
+ for i in range(1, 11):
+ count += line_space
+ x_pos.append(count)
+ return x_pos
+
+
+
+#Set the resolution, probably want this to be changable
+
+
+#Create the gridlines
+def create_gridlines(w, h):
+ x_pos = get_pos(w)
+ y_pos = get_pos(h)
+ new_image = Image.new(mode='RGBA', size=(w, h), color=(255,255,255,0))
+ for x in x_pos:
+ draw = ImageDraw.Draw(new_image)
+ #x = new_image.width / 2
+ y0 = 0
+ y1 = new_image.height
+ line = ((x, y0), (x, y1))
+ draw.line(line, fill="red", width=2)
+ del draw
+
+ for y in y_pos:
+ draw = ImageDraw.Draw(new_image)
+ #x = new_image.width / 2
+ x0 = 0
+ x1 = new_image.width
+ line = ((x0, y), (x1, y))
+ draw.line(line, fill="red", width=3)
+ del draw
+ new_image.save("Images/{}x{}_grid.png".format(w,h))
+ return "Images/{}x{}_grid.png".format(w,h)
+
+
+
+#https://www.geeksforgeeks.org/how-to-take-screenshots-using-python/
+#https://www.geeksforgeeks.org/overlay-an-image-on-another-image-in-python/
+#https://www.codegrepper.com/code-examples/python/display+image+python+small+screen
+#https://www.etutorialspoint.com/index.php/319-python-opencv-overlaying-or-blending-two-images
+#https://stackoverflow.com/questions/57736832/how-can-you-read-rgba-using-opencv
+
+def take_screenshot(w, h, grid_file, stop):
+ while not stop[0]:
+ cv2.namedWindow("output", cv2.WINDOW_NORMAL)
+ image = pyautogui.screenshot()
+ image = cv2.cvtColor(np.array(image),
+ cv2.COLOR_RGB2BGRA)
+ cv2.imwrite("Images/screenshot.png", image)
+
+
+ im1 = Image.open("Images/screenshot.png").convert("RGBA")
+ im2 = Image.open(grid_file).convert("RGBA")
+ im1.paste(im2, (0,0), mask = im2)
+ # Displaying the image
+ im1.save("Images/tmp.png")
+ im = cv2.imread("Images/tmp.png")
+
+ imS = cv2.resize(im, (w, h))
+ cv2.imshow("output", imS)
+ k = cv2.waitKey(17)
+ cv2.destroyAllWindows()
+ exit()
\ No newline at end of file
diff --git a/sidekick.cfg b/sidekick.cfg
new file mode 100644
index 0000000..fbb0302
--- /dev/null
+++ b/sidekick.cfg
@@ -0,0 +1,3 @@
+{
+"resolution": "1920x1080"
+}
\ No newline at end of file
diff --git a/sidekick.py b/sidekick.py
index 5f5da7e..7db6fd0 100644
--- a/sidekick.py
+++ b/sidekick.py
@@ -15,19 +15,25 @@
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
'''
+from errno import EHOSTDOWN
+from black import wrap_stream_for_windows
from vosk import Model, KaldiRecognizer
+import sys
import os
import json
import audioop
import string
import math
from parsepackage import *
+from parsepackage.volume_parser import VolumeParser
if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)
import pyaudio
+import json
+import json
parser = parser.Parser()
@@ -38,21 +44,24 @@ def listToList(words):
wordlist = wordlist.strip(",") + "]"
return wordlist
-def setRec(state,crec,trec,arec):
+def setRec(state,crec,trec,arec, prec):
if state == "text":
return trec
- elif state == "command" or state == "mouse":
+ elif state == "program":
+ return prec
+ elif state == "command" or state == "mouse" or state == "volume":
return crec
else:
return arec
-def clearRec(crec,trec,arec):
+def clearRec(crec,trec,arec,prec):
crec.Result()
trec.Result()
arec.Result()
+ prec.Result()
-def stateSwap(nextstate,crec,trec,arec):
- rec = setRec(nextstate,crec,trec,arec)
+def stateSwap(nextstate,crec,trec,arec, prec):
+ rec = setRec(nextstate,crec,trec,arec, prec)
res = json.loads(rec.Result())
swap = False
if res["text"] != "":
@@ -62,28 +71,53 @@ def stateSwap(nextstate,crec,trec,arec):
if res["text"] == nextstate:
swap = True
- clearRec(crec,trec,arec)
+ clearRec(crec,trec,arec,prec)
-def ingest(currentstate,crec,trec,arec):
- rec = setRec(currentstate,crec,trec,arec)
+def ingest(currentstate,crec,trec,arec, prec):
+ rec = setRec(currentstate,crec,trec,arec, prec)
res = json.loads(rec.Result()) # this not only returns the most accurate result, but also flushes the list of words stored internally
if res["text"] != "":
for text in res["text"].split(" "):
- if text in ["text","alpha","command"] and text != currentstate:
+ if text in ["text","alpha","command", "program"] and text != currentstate:
parser.ingest(text)
- stateSwap(text,crec,trec,arec)
+ stateSwap(text,crec,trec,arec, prec)
else:
parser.ingest(text)
-
- clearRec(crec,trec,arec)
+
+ clearRec(crec,trec,arec, prec)
+
+def load_config():
+ f =open("sidekick.cfg")
+ config = json.load(f)
+ f.close()
+ return config
+
+config = load_config()
+
+# Set the screen resolution for screenshots from config file
+resolution = config['resolution'].split("x")
+screen_size = (int(resolution[0]), int(resolution[1]))
+print(screen_size)
+parser.set_screen_size(screen_size)
# create wordlist for our command model so that commands will be more accurately detected
+lower_buffer = 0
+upper_buffer = 0
+if len(sys.argv) >= 2:
+ lower_buffer = sys.argv[1]
+
+if len(sys.argv) >= 3:
+ upper_buffer = sys.argv[2]
+
+
+
commandwords = listToList(parser.nontextcommands)
alphavals = listToList(parser.alphavalues)
model = Model("model")
# the text recommender uses the standard model for transcription
textrec = KaldiRecognizer(model, 16000)
+programrec = KaldiRecognizer(model, 16000)
# use wordlist in our command recommender
commandrec = KaldiRecognizer(model, 16000, commandwords)
alpharec = KaldiRecognizer(model, 16000, alphavals)
@@ -91,7 +125,7 @@ def ingest(currentstate,crec,trec,arec):
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
stream.start_stream()
-
+parser.set_audio_stream(stream)
print("\nSidekick at your service. Please wait silently for the threshold to be set based on ambient noise before use.")
threshold_buffer = 1 # how many dB above ambient noise threshold will be set
@@ -105,7 +139,7 @@ def ingest(currentstate,crec,trec,arec):
data = stream.read(4000,exception_on_overflow = False)
# calculate decibels
- dB = 20 * math.log10(audioop.rms(data,2))
+ dB = 20 * math.log10(audioop.rms(data,2)+1)
# we want to set threshold based on ambient noise prior to processing audio data
if not thresholdset:
@@ -116,7 +150,9 @@ def ingest(currentstate,crec,trec,arec):
print("Your sidekick now awaits your command.")
threshold = sum(ambientvals) / len(ambientvals) + threshold_buffer
print("Threshold is now set at " + str(round(threshold,2)) + " dB.")
-
+ parser.set_threshold(threshold)
+
+
# send audio data to model for processing when threshold breached and shortly afterward
elif dB > threshold or wait == True:
@@ -129,21 +165,58 @@ def ingest(currentstate,crec,trec,arec):
wait = False
trec = textrec.AcceptWaveform(data)
+ prec = programrec.AcceptWaveform(data)
crec = commandrec.AcceptWaveform(data)
arec = alpharec.AcceptWaveform(data)
-
if len(data) == 0:
break
+ print(parser.state)
if parser.state == "text":
if trec: # if this returns true model has determined best word candidate
- ingest(parser.state,commandrec,textrec,alpharec)
+ ingest(parser.state,commandrec,textrec,alpharec, programrec)
else: # if false only a partial result returned - not useful for this application
pass
#print(rec.PartialResult()) - partial result is faster, but not accurate enough for use
-
+
+ elif parser.state == "program":
+ if prec: # if this returns true model has determined best word candidate
+ ingest(parser.state,commandrec,textrec,alpharec,programrec)
+ else: # if false only a partial result returned - not useful for this application
+ pass
+
elif parser.state == "alpha":
if arec: # if this returns true model has determined best word candidate
- ingest(parser.state,commandrec,textrec,alpharec)
+ ingest(parser.state,commandrec,textrec,alpharec, programrec)
+
+ elif parser.state == "volume":
+ #ingest(parser.state,commandrec,textrec,alpharec,programrec)
+ if parser.volumeParser.volumeStarted == True:
+ parser.dB = dB
+
+ lower_threshold = threshold + float(lower_buffer)
+ upper_threshold = 50 + float(upper_buffer)
+
+ parser.volumeParser.thresh.append(lower_threshold)
+ parser.volumeParser.thresh.append((upper_threshold-lower_threshold) + lower_threshold)
+ parser.volumeParser.thresh.append(upper_threshold)
+
+ parser.volumeParser.evaluate_volume(parser.command_buffer, parser.dB)
+ ingest(parser.state,commandrec,textrec,alpharec, programrec)
+
+ if parser.volumeParser.stopVolume == True:
+ parser.state = "command"
+
+ elif parser.state == "horizontal":
+ if parser.horizontalParser.volumeStarted == True:
+ if dB < 35:
+ y = parser.horizontalParser.setVolumeCoord(180)
+ elif dB >= 35:
+ parser.horizontalParser.setVolumeCoord(0)
+ command_buffer = []
+
+
else:
if crec: # if this returns true model has determined best word candidate
- ingest(parser.state,commandrec,textrec,alpharec)
\ No newline at end of file
+ ingest(parser.state,commandrec,textrec,alpharec, programrec)
+
+
diff --git a/vosk-model-small-en-us-0.15.zip b/vosk-model-small-en-us-0.15.zip
new file mode 100644
index 0000000..0c94ec8
Binary files /dev/null and b/vosk-model-small-en-us-0.15.zip differ