-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmicpipe.py
More file actions
1631 lines (1443 loc) · 67.5 KB
/
micpipe.py
File metadata and controls
1631 lines (1443 loc) · 67.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import logging
import time
import os
import json
import Quartz
import threading
import re
import rumps
import argparse
from AppKit import NSWorkspace, NSApplicationActivateIgnoringOtherApps, NSSound, NSScreen
from chrome_script import ChatGPTChrome, GeminiChrome
from clipboard_guard import snapshot_clipboard
from paste_tool import paste_text
from state_manager import MicPipeStateStore
# ============================================================
# HOTKEY CONFIGURATION - Change the keycode below to customize
# ============================================================
# Common keycodes for reference:
# 63 - Fn (Function key)
# 54 - Right Command
# 55 - Left Command
# 58 - Right Option
# 61 - Left Option
# 59 - Right Control
# 62 - Left Control
# 48 - Tab
# 53 - Escape
# 51 - Delete (Backspace)
# 117 - Forward Delete
# 36 - Return (Enter)
# 49 - Space
# ============================================================
__version__ = "1.5.1"
VOICE_IDLE_TIMEOUT_SECONDS = 20
VOICE_IDLE_TIMEOUT_OPTIONS = [0, 10, 15, 20, 25, 30]
def configure_logging(debug: bool):
logging.basicConfig(
level=logging.DEBUG if debug else logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# ============================================================
class MicPipeApp(rumps.App):
_TAB_LOC_RE = re.compile(r"(?:USED_WIN_ID|FALLBACK_WIN_ID)=(\d+),TAB=(\d+):")
def __init__(self, debug: bool = False):
super(MicPipeApp, self).__init__("MicPipe", quit_button="Quit")
self.base_path = os.path.dirname(__file__)
self.icon = os.path.join(self.base_path, "assets/icon_idle_template.png")
self.template = True # Enable template mode for idle icon
self.state_path = os.path.join(
os.path.expanduser("~"),
"Library",
"Application Support",
"MicPipe",
"micpipe_state.json",
)
self.state_store = MicPipeStateStore(self.state_path, logger)
self.debug = debug
self.dedicated_bounds = self._compute_dedicated_bounds(debug)
self.voice_bounds = self._compute_voice_bounds(debug)
# Load saved state
state = self.state_store.load()
self.current_service = state["current_service"]
self.sound_enabled = state["sound_enabled"]
self.dedicated_windows = state["dedicated_windows"]
self.trigger_key = state["trigger_key"]
self.voice_idle_timeout_seconds = state["voice_idle_timeout_seconds"]
self.pipe_slots = state["pipe_slots"]
self.current_pipe_slot = state["current_pipe_slot"]
self.chatgpt_chrome = ChatGPTChrome()
self.gemini_chrome = GeminiChrome()
self.chrome = self.chatgpt_chrome if self.current_service == "ChatGPT" else self.gemini_chrome # Active controller
self.is_recording = False
self.is_voice_conversation = False
self._voice_conversation_starting = False
self._voice_activity_signature = ""
self._last_voice_activity_at = 0.0
self._last_voice_activity_check_at = 0.0
self._voice_activity_check_inflight = False
self._voice_idle_stop_requested = False
self.current_state = "IDLE"
self.animation_frame = 0
self.tap = None
# Internal state
self.target_app = None
self.target_is_service_page = False # True if triggered from ChatGPT/Gemini page
self.trigger_key_currently_pressed = False
self.voice_fn_currently_pressed = False
self.should_auto_start = False
self.waiting_for_page = False
self.service_tab_location = None # Dedicated window location
self.dedicated_window = None
self._sound_start = os.path.join(self.base_path, "assets", "sound_start.wav")
self._sound_stop = os.path.join(self.base_path, "assets", "sound_stop.wav")
self._sound_voice_start = os.path.join(self.base_path, "assets", "sound_voice_start.wav")
self._sound_voice_stop = os.path.join(self.base_path, "assets", "sound_voice_stop.wav")
self._cmd_file = os.path.join(os.path.expanduser("~"), "Library", "Application Support", "MicPipe", "cmd")
# Ensure dedicated window exists at startup (in background)
self._ensure_dedicated_window()
threading.Thread(target=self._check_service_ready_on_startup).start()
# Build menu
self.status_item = rumps.MenuItem("Status: Ready", callback=None)
# Service selection submenu
self.service_chatgpt = rumps.MenuItem("ChatGPT", callback=self.select_chatgpt)
self.service_chatgpt.state = 1 if self.current_service == "ChatGPT" else 0
self.service_gemini = rumps.MenuItem("Gemini", callback=self.select_gemini)
self.service_gemini.state = 1 if self.current_service == "Gemini" else 0
self.service_menu = rumps.MenuItem("Service")
self.service_menu.add(self.service_chatgpt)
self.service_menu.add(self.service_gemini)
# Hotkey selection submenu
self.hotkey_menu = rumps.MenuItem("Hotkey")
self.hotkey_items = {}
for keycode, display_name in MicPipeStateStore.HOTKEY_OPTIONS:
item = rumps.MenuItem(display_name, callback=self._make_hotkey_callback(keycode))
item.state = 1 if keycode == self.trigger_key else 0
self.hotkey_items[keycode] = item
self.hotkey_menu.add(item)
self.voice_idle_menu = rumps.MenuItem(" Auto-Stop Delay")
self.voice_idle_items = {}
for seconds in VOICE_IDLE_TIMEOUT_OPTIONS:
label = "Off" if seconds == 0 else f"{seconds}s"
item = rumps.MenuItem(label, callback=self._make_voice_idle_timeout_callback(seconds))
item.state = 1 if seconds == self.voice_idle_timeout_seconds else 0
self.voice_idle_items[seconds] = item
self.voice_idle_menu.add(item)
self.realtime_voice_start_item = rumps.MenuItem("", callback=None)
self.realtime_voice_stop_item = rumps.MenuItem("", callback=None)
# AI Pipe submenu
self.pipe_menu = rumps.MenuItem("AI Pipe")
self.pipe_items = {}
# Off option
off_item = rumps.MenuItem("Off (Direct Voice-to-Text)", callback=self._make_pipe_callback(-1))
off_item.state = 1 if self.current_pipe_slot == -1 else 0
self.pipe_items[-1] = off_item
self.pipe_menu.add(off_item)
# Conversation Mode option (special slot -2: no preset prompt, direct AI processing)
conv_mode_item = rumps.MenuItem("Conversation Mode (AI follows your spoken instructions)", callback=self._make_pipe_callback(-2))
conv_mode_item.state = 1 if self.current_pipe_slot == -2 else 0
self._conv_mode_item = conv_mode_item
self.pipe_items[-2] = conv_mode_item
self.pipe_menu.add(conv_mode_item)
self.pipe_menu.add(None) # Separator
# Slot options - click to select, long text shows title, option-click to edit
for i in range(5):
slot_label = self._get_slot_label(i)
# Click selects the slot; we'll add edit via a submenu per slot
slot_submenu = rumps.MenuItem(slot_label)
slot_submenu.state = 1 if self.current_pipe_slot == i else 0
# Add submenu items: Select and Edit
select_item = rumps.MenuItem("✓ Use This Prompt", callback=self._make_pipe_callback(i))
edit_item = rumps.MenuItem("✎ Edit...", callback=self._make_edit_slot_callback(i))
slot_submenu.add(select_item)
slot_submenu.add(edit_item)
self.pipe_items[i] = slot_submenu
self.pipe_menu.add(slot_submenu)
self.pipe_menu.add(None) # Separator
# Add non-clickable note about latency
latency_note = rumps.MenuItem("Note: AI processing adds latency", callback=None)
self.pipe_menu.add(latency_note)
self.hold_mode_info = rumps.MenuItem(f" Hold → Hold to Speak", callback=None)
self.toggle_mode_info = rumps.MenuItem(f" Click → Toggle Start/Stop", callback=None)
self.cancel_mode_info = rumps.MenuItem(" Press Esc → Cancel Dictation", callback=None)
self.dictation_section_title = rumps.MenuItem("Dictation", callback=None)
self.realtime_voice_section_title = rumps.MenuItem("ChatGPT Realtime Voice Chat", callback=None)
self.system_section_title = rumps.MenuItem("System", callback=None)
self.sound_toggle_item = rumps.MenuItem(
"Sound: On" if self.sound_enabled else "Sound: Off",
callback=self.toggle_sound
)
self.reset_item = rumps.MenuItem("Reset (Self-Check & Repair)", callback=self.reset_app)
self.version_info = rumps.MenuItem(f"Version: {__version__}", callback=None)
self._refresh_voice_menu_info()
self.menu = [
self.status_item,
None, # Separator
self.dictation_section_title,
self.service_menu,
self.hotkey_menu,
self.pipe_menu,
self.hold_mode_info,
self.toggle_mode_info,
self.cancel_mode_info,
None, # Separator
self.realtime_voice_section_title,
self.realtime_voice_start_item,
self.realtime_voice_stop_item,
self.voice_idle_menu,
None, # Separator
self.system_section_title,
self.sound_toggle_item,
self.reset_item,
None, # Separator
self.version_info
]
# Animation timer (runs at 10Hz)
self.timer = rumps.Timer(self._update_animation, 0.1)
self.timer.start()
def _save_state(self):
self.state_store.save(
self.current_service,
self.sound_enabled,
self.dedicated_windows,
self.trigger_key,
self.voice_idle_timeout_seconds,
self.pipe_slots,
self.current_pipe_slot
)
def _make_hotkey_callback(self, keycode):
"""Create a callback function for hotkey menu item selection."""
def callback(_):
if self.is_recording:
return # Don't change hotkey during recording
# Update checkmarks
for kc, item in self.hotkey_items.items():
item.state = 1 if kc == keycode else 0
self.trigger_key = keycode
self._refresh_voice_menu_info()
self._save_state()
# Show notification about the change
key_name = self._get_key_name(keycode)
rumps.notification("MicPipe", "Hotkey Changed", f"New hotkey: {key_name}")
return callback
def _refresh_voice_menu_info(self):
self.realtime_voice_start_item.title = " Start: Control+Fn"
self.realtime_voice_stop_item.title = " Stop: Fn or Esc"
def _make_voice_idle_timeout_callback(self, seconds):
"""Create a callback for selecting the voice auto-stop timeout."""
def callback(_):
for timeout, item in self.voice_idle_items.items():
item.state = 1 if timeout == seconds else 0
self.voice_idle_timeout_seconds = seconds
self._save_state()
message = "Disabled" if seconds == 0 else f"Auto-stop after {seconds}s"
rumps.notification("MicPipe", "Voice Auto-Stop", message)
return callback
def _get_slot_label(self, slot_index):
"""Get display label for a pipe slot (uses title)"""
slot = self.pipe_slots[slot_index]
title = slot.get("title", "") if isinstance(slot, dict) else ""
prompt = slot.get("prompt", "") if isinstance(slot, dict) else slot
if title:
return f"Slot {slot_index + 1}: {title}"
elif prompt:
preview = prompt[:25] + "..." if len(prompt) > 25 else prompt
return f"Slot {slot_index + 1}: {preview}"
else:
return f"Slot {slot_index + 1}: (empty)"
def _make_pipe_callback(self, slot_index):
"""Create callback for selecting a pipe slot"""
def callback(_):
if self.is_recording:
return # Don't change during recording
# Update checkmarks
for idx, item in self.pipe_items.items():
item.state = 1 if idx == slot_index else 0
self.current_pipe_slot = slot_index
self._save_state()
# Show notification
if slot_index == -1:
rumps.notification("MicPipe", "AI Pipe", "AI Pipe disabled")
else:
slot = self.pipe_slots[slot_index]
title = slot.get("title", "") if isinstance(slot, dict) else ""
msg = title if title else f"Slot {slot_index + 1}"
rumps.notification("MicPipe", "AI Pipe", f"Using: {msg}")
return callback
def _make_edit_slot_callback(self, slot_index):
"""Create callback for editing a pipe slot using standalone editor"""
def callback(_):
import subprocess
import json
import os
import threading
import sys
slot = self.pipe_slots[slot_index]
current_title = slot.get("title", "") if isinstance(slot, dict) else ""
current_prompt = slot.get("prompt", "") if isinstance(slot, dict) else slot
def run_editor():
try:
# Get path to editor script
script_dir = os.path.dirname(os.path.abspath(__file__))
editor_path = os.path.join(script_dir, "slot_editor.py")
# Run editor as separate process using Popen
proc = subprocess.Popen(
[sys.executable, editor_path, str(slot_index), current_title, current_prompt],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
# Wait for editor to close
stdout, stderr = proc.communicate(timeout=300)
if proc.returncode == 0 and stdout.strip():
data = json.loads(stdout.strip())
if data.get("saved"):
self.pipe_slots[slot_index] = {
"title": data["title"],
"prompt": data["prompt"]
}
self._save_state()
# Update menu
new_label = self._get_slot_label(slot_index)
self.pipe_items[slot_index].title = new_label
rumps.notification("MicPipe", "Slot Updated", f"Slot {slot_index + 1} has been updated")
else:
if stderr:
logger.debug(f"Editor stderr: {stderr}")
except Exception as e:
logger.error(f"Editor failed: {e}")
# Run in thread to not block
thread = threading.Thread(target=run_editor, daemon=True)
thread.start()
return callback
def _compute_dedicated_bounds(self, debug: bool):
# Fixed window size that ensures the microphone button is visible
width = 700
height = 500
if debug:
# Debug mode: visible window at top-left
return (0, 0, width, height)
# Production mode: Push window to bottom-right corner, minimizing visible area.
# macOS enforces that at least a few pixels remain visible. Through testing:
# - Pushing right: left = screen_width - 5 leaves only ~5px visible
# - Pushing down: top = screen_height - 50 works well
try:
screens = NSScreen.screens()
if not screens:
return (20000, 2000, 20000 + width, 2000 + height)
# Get the main screen dimensions
main_screen = screens[0].frame()
screen_width = int(main_screen.size.width)
screen_height = int(main_screen.size.height)
# Position at bottom-right corner, leaving minimal visible area
left = screen_width - 5 # Only ~5px visible on right edge
top = screen_height - 50 # Near bottom of screen
return (left, top, left + width, top + height)
except Exception:
return (20000, 2000, 20000 + width, 2000 + height)
def _compute_voice_bounds(self, debug: bool):
width = 760
height = 620
if debug:
return (120, 90, 120 + width, 90 + height)
try:
screens = NSScreen.screens()
if not screens:
return (160, 100, 160 + width, 100 + height)
visible = screens[0].visibleFrame()
margin_left = 32
margin_top = 72
left = int(visible.origin.x + margin_left)
top = int(margin_top)
return (left, top, left + width, top + height)
except Exception:
return (160, 100, 160 + width, 100 + height)
def _get_ready_status(self, res: str) -> str:
if not res or not res.startswith("SUCCESS"):
return ""
return res.rsplit(":", 1)[-1]
def _prompt_service_login(self, details: str):
location = self.service_tab_location or self.dedicated_windows.get(self.current_service)
if location:
try:
self.chrome.reveal_window(location[0])
except Exception:
pass
rumps.notification(
"MicPipe",
f"{self.current_service} Login or Permission Issue",
details
)
def _window_creation_failure_message(self, chrome, service_name: str) -> str:
error = (getattr(chrome, "last_error", "") or "").strip()
if ":-1743:" in error or "Not authorized to send Apple events to Google Chrome" in error:
return (
"MicPipe does not currently have permission to control Google Chrome. "
"Go to System Settings > Privacy & Security > Automation, "
"allow your terminal or launcher app to control Google Chrome, then try again."
)
if ":-1728:" in error or "Can't get application \"Google Chrome\"" in error:
return (
"Google Chrome could not be accessed. "
"Open Chrome once manually, confirm Automation permission, and try again."
)
if error == "EMPTY_RESULT":
return (
f"Could not create the dedicated {service_name} window. "
"Make sure Google Chrome is installed and Automation permission is working."
)
if error:
return f"Could not create the dedicated {service_name} window. Details: {error}"
return f"Could not create the dedicated {service_name} window."
def _update_service_tab_location_from_result(self, result: str):
"""Update self.service_tab_location when Chrome reports the actual window/tab used."""
if not result:
return
m = self._TAB_LOC_RE.search(result)
if not m:
return
try:
win_id = int(m.group(1))
tab_idx = int(m.group(2))
except Exception:
return
if win_id > 0 and tab_idx > 0:
old = self.service_tab_location
self.service_tab_location = (win_id, tab_idx)
self.dedicated_window = self.service_tab_location
self.dedicated_windows[self.current_service] = self.service_tab_location
self._save_state()
if old != self.service_tab_location:
logger.debug(f"Updated service_tab_location: {old} -> {self.service_tab_location}")
def _ensure_dedicated_window(self):
"""Ensure the dedicated window exists for the current service."""
try:
chrome = self.chatgpt_chrome if self.current_service == "ChatGPT" else self.gemini_chrome
service_name = self.current_service
location = self.dedicated_windows.get(service_name)
if location and chrome.is_window_alive(*location):
self.dedicated_window = location
self.service_tab_location = location
try:
chrome.set_window_bounds(location[0], self.dedicated_bounds)
except Exception:
pass
chrome.demote_window(location[0])
self._save_state()
return location, False
new_location = chrome.create_dedicated_window(bounds=self.dedicated_bounds)
if new_location:
self.dedicated_windows[service_name] = new_location
self.dedicated_window = new_location
self.service_tab_location = new_location
logger.info(f"{service_name} dedicated window created: {new_location}")
chrome.demote_window(new_location[0])
self._save_state()
return new_location, True
logger.error(
f"Failed to create dedicated window for {service_name}. "
f"Last error: {getattr(chrome, 'last_error', '')}"
)
return None, False
except Exception as e:
logger.error(f"Failed to ensure dedicated window: {e}")
return None, False
def _hide_dedicated_window(self):
if not self.service_tab_location:
return
try:
self.chrome.set_window_bounds(self.service_tab_location[0], self.dedicated_bounds)
except Exception as e:
logger.debug(f"Failed to restore dedicated window bounds: {e}")
try:
self.chrome.demote_window(self.service_tab_location[0])
except Exception as e:
logger.debug(f"Failed to demote dedicated window: {e}")
def select_chatgpt(self, _):
"""Switch to ChatGPT service."""
if self.is_recording:
return # Don't switch during recording
self.current_service = "ChatGPT"
self.chrome = self.chatgpt_chrome
self.service_chatgpt.state = 1
self.service_gemini.state = 0
self.cancel_mode_info.title = " Press Esc → Cancel Dictation"
self._save_state()
self._ensure_dedicated_window()
def select_gemini(self, _):
"""Switch to Gemini service."""
if self.is_recording:
return # Don't switch during recording
self.current_service = "Gemini"
self.chrome = self.gemini_chrome
self.service_chatgpt.state = 0
self.service_gemini.state = 1
self.cancel_mode_info.title = " Press Esc → Cancel (ChatGPT only)"
self._save_state()
self._ensure_dedicated_window()
def _check_cmd_file(self):
"""Check for CLI command file and execute if present."""
try:
if not os.path.exists(self._cmd_file):
return
with open(self._cmd_file, "r") as f:
cmd = f.read().strip()
os.remove(self._cmd_file)
if cmd == "voice-start":
threading.Thread(target=self.start_voice_conversation).start()
elif cmd == "voice-stop":
threading.Thread(target=self.stop_voice_conversation).start()
elif cmd == "voice-toggle":
if self.is_voice_conversation:
threading.Thread(target=self.stop_voice_conversation).start()
else:
threading.Thread(target=self.start_voice_conversation).start()
except Exception as e:
logger.debug(f"Failed to process CLI command file: {e}")
def _extract_success_payload(self, result: str) -> str:
if not result or not result.startswith("SUCCESS:"):
return ""
payload = result.split("SUCCESS:", 1)[1]
return re.sub(r"^(?:USED_WIN_ID|FALLBACK_WIN_ID)=\d+,TAB=\d+:", "", payload, count=1)
def _reset_voice_activity_tracking(self):
self._voice_activity_signature = ""
self._last_voice_activity_at = 0.0
self._last_voice_activity_check_at = 0.0
self._voice_activity_check_inflight = False
self._voice_idle_stop_requested = False
def _summarize_voice_activity_signature(self, signature: str) -> str:
if not signature:
return "<empty>"
compact = re.sub(r"\s+", " ", signature).strip()
if len(compact) > 120:
compact = "..." + compact[-117:]
return compact
def _get_voice_activity_signature(self):
if not self.service_tab_location or self.current_service != "ChatGPT":
return False, ""
try:
res = self.chrome.get_voice_activity_snapshot(
preferred_location=self.service_tab_location
)
except Exception as e:
logger.debug(f"Voice activity snapshot failed: {e}")
return False, ""
payload = self._extract_success_payload(res)
if not payload:
return False, ""
try:
data = json.loads(payload)
except Exception as e:
logger.debug(f"Failed to parse voice activity snapshot: {e}; payload={payload[:200]}")
return False, ""
assistant_text = str(data.get("assistant_text") or "").strip()
user_text = str(data.get("user_text") or "").strip()
assistant_count = int(data.get("assistant_count") or 0)
user_count = int(data.get("user_count") or 0)
signature = (
f"a#{assistant_count}:{assistant_text}"
f"|u#{user_count}:{user_text}"
)
return bool(data.get("active")), signature
def _check_voice_idle_timeout(self):
try:
if self.voice_idle_timeout_seconds <= 0:
return
_active, signature = self._get_voice_activity_signature()
now = time.time()
self._last_voice_activity_check_at = now
if not self.is_voice_conversation:
return
if signature and signature != self._voice_activity_signature:
self._voice_activity_signature = signature
self._last_voice_activity_at = now
logger.info(
"Voice activity detected; idle timer reset. "
f"text={self._summarize_voice_activity_signature(signature)}"
)
return
if not self._last_voice_activity_at:
self._last_voice_activity_at = now
logger.info("Voice idle watcher initialized.")
idle_for = now - self._last_voice_activity_at
logger.debug(
f"Voice idle check: idle_for={idle_for:.1f}s, "
f"text={self._summarize_voice_activity_signature(self._voice_activity_signature)}"
)
if idle_for >= self.voice_idle_timeout_seconds and not self._voice_idle_stop_requested:
self._voice_idle_stop_requested = True
logger.info(
f"Voice conversation idle for {idle_for:.1f}s; auto-stopping."
)
threading.Thread(target=self.stop_voice_conversation, daemon=True).start()
except Exception as e:
logger.debug(f"Voice idle check failed: {e}")
finally:
self._voice_activity_check_inflight = False
def _update_animation(self, _):
"""Update menu bar icon based on current state"""
self.animation_frame += 1
# Check for CLI commands (every 5 frames = 2Hz, lightweight stat() call)
if self.animation_frame % 5 == 0:
self._check_cmd_file()
if (
self.is_voice_conversation
and self.voice_idle_timeout_seconds > 0
and not self._voice_activity_check_inflight
and self.animation_frame % 10 == 0
):
self._voice_activity_check_inflight = True
threading.Thread(target=self._check_voice_idle_timeout, daemon=True).start()
if self.tap and self.animation_frame % 50 == 0:
try:
if not Quartz.CGEventTapIsEnabled(self.tap):
logger.warning("Event tap disabled by macOS, re-enabling.")
Quartz.CGEventTapEnable(self.tap, True)
except Exception as e:
logger.debug(f"Failed to validate/re-enable event tap: {e}")
if self.current_state == "IDLE":
idle_icon = os.path.join(self.base_path, "assets/icon_idle_template.png")
if self.icon != idle_icon:
self.icon = idle_icon
self.template = True
self.title = None
elif self.current_state == "RECORDING":
# Pulsating red dot animation (every 2 frames = 5Hz)
if self.animation_frame % 2 == 0:
frame = (self.animation_frame // 2) % 4 + 1
self.icon = os.path.join(self.base_path, f"assets/icon_rec_{frame}.png")
self.template = False # Red color needs template=False
self.title = None
elif self.current_state == "VOICE_CONVERSATION":
# Pulsating purple dot animation for voice conversation
if self.animation_frame % 2 == 0:
frame = (self.animation_frame // 2) % 4 + 1
self.icon = os.path.join(self.base_path, f"assets/icon_voice_{frame}.png")
self.template = False
self.title = None
elif self.current_state == "WAITING" or self.current_state == "PROCESSING":
# Spinning icon animation (every 2 frames = 5Hz)
if self.animation_frame % 2 == 0:
frame = (self.animation_frame // 2) % 4 + 1
self.icon = os.path.join(self.base_path, f"assets/icon_pro_{frame}.png")
self.template = True
self.title = None
def _get_key_name(self, keycode):
"""Get human-readable key name from keycode"""
key_names = {
63: "Fn",
54: "Right Cmd",
55: "Left Cmd",
58: "Right Option",
61: "Left Option",
59: "Right Control",
62: "Left Control",
60: "Left Shift",
56: "Right Shift",
48: "Tab",
53: "Esc",
51: "Delete",
117: "Forward Delete",
36: "Return",
49: "Space",
}
return key_names.get(keycode, f"Key {keycode}")
def _play_sound(self, path):
if not self.sound_enabled:
return
try:
sound = NSSound.alloc().initWithContentsOfFile_byReference_(path, True)
if sound:
sound.play()
except Exception:
pass
def toggle_sound(self, _):
self.sound_enabled = not self.sound_enabled
self.sound_toggle_item.title = "Sound: On" if self.sound_enabled else "Sound: Off"
self._save_state()
def _is_recording_active(self) -> bool:
try:
res = self.chrome.is_recording_active(preferred_location=self.service_tab_location)
except Exception as e:
logger.debug(f"Recording-state check raised exception: {e}")
return False
if isinstance(res, bool):
return res
if not res:
return False
status = str(res)
if status.startswith("SUCCESS"):
status = status.rsplit(":", 1)[-1]
return status == "ACTIVE"
def _start_dictation_with_verification(self):
"""Start dictation and verify the page actually entered recording state."""
max_attempts = 2 if self.current_service == "ChatGPT" else 1
last_result = ""
for attempt in range(max_attempts):
res = self.chrome.start_dictation(preferred_location=self.service_tab_location)
last_result = res
if not res.startswith("SUCCESS"):
return False, res
self._update_service_tab_location_from_result(res)
time.sleep(0.5)
if self._is_recording_active():
return True, res
logger.warning(
f"Recording verification failed after start click "
f"(attempt {attempt + 1}/{max_attempts})."
)
return False, last_result or "VERIFY_FAILED"
def reset_app(self, _):
"""Run a lightweight self-check and recovery routine."""
previous_app = NSWorkspace.sharedWorkspace().frontmostApplication()
tap_msg = "Event Tap unavailable"
if self.tap:
try:
Quartz.CGEventTapEnable(self.tap, True)
tap_ok = bool(Quartz.CGEventTapIsEnabled(self.tap))
tap_msg = "Event Tap OK" if tap_ok else "Event Tap re-enable failed"
except Exception as e:
tap_msg = f"Event Tap error: {e}"
service_name = self.current_service
old_location = self.service_tab_location or self.dedicated_windows.get(service_name)
closed_old = False
if old_location:
try:
closed_old = self.chrome.close_window(old_location[0])
except Exception:
closed_old = False
self.dedicated_windows.pop(service_name, None)
self.service_tab_location = None
self.dedicated_window = None
new_location, _ = self._ensure_dedicated_window()
window_ok = bool(new_location)
if new_location:
try:
# Extra guard: keep dedicated window at the bottom after repair.
self.chrome.demote_window(new_location[0])
except Exception:
pass
self.is_recording = False
self.is_voice_conversation = False
self._voice_conversation_starting = False
self._reset_voice_activity_tracking()
self.waiting_for_page = False
self.should_auto_start = False
self.trigger_key_currently_pressed = False
self.voice_fn_currently_pressed = False
self.current_state = "IDLE"
self.status_item.title = "Status: Ready"
# Demote controls Chrome window stacking; this restores the app focus
# so repair does not leave user on the dedicated Chrome window.
if previous_app:
try:
time.sleep(0.05)
previous_app.activateWithOptions_(NSApplicationActivateIgnoringOtherApps)
except Exception:
pass
window_msg = "Window rebuilt" if window_ok else "Window rebuild failed"
if old_location and not closed_old:
window_msg = "Window rebuild attempted (old window close failed)"
logger.info(f"Reset complete: {tap_msg}; {window_msg}; state reset")
rumps.notification(
"MicPipe",
"Reset (Self-Check & Repair)",
f"{tap_msg} | {window_msg} | State reset"
)
def event_callback(self, proxy, event_type, event, refcon):
"""System event callback: Monitor trigger key"""
if event_type == Quartz.kCGEventKeyDown:
keycode = Quartz.CGEventGetIntegerValueField(event, 9)
if keycode == 53: # Esc
if self.is_voice_conversation:
threading.Thread(target=self.stop_voice_conversation).start()
return event
# Gemini does not support cancel, only ChatGPT does
if self.current_service == "ChatGPT" and (self.is_recording or self.waiting_for_page):
threading.Thread(target=self.cancel_recording).start()
return event
if event_type == Quartz.kCGEventFlagsChanged:
keycode = Quartz.CGEventGetIntegerValueField(event, 9)
flags = Quartz.CGEventGetFlags(event)
# Fixed shortcut for realtime voice: Control+Fn starts, Fn stops.
if keycode == 63:
fn_pressed = self._is_key_pressed(keycode, flags)
if fn_pressed != self.voice_fn_currently_pressed:
self.voice_fn_currently_pressed = fn_pressed
if self.is_voice_conversation:
if fn_pressed:
threading.Thread(target=self.stop_voice_conversation).start()
return event
if fn_pressed and not self.is_recording:
control_held = bool(flags & Quartz.kCGEventFlagMaskControl)
if control_held and self.current_service == "ChatGPT":
threading.Thread(target=self.start_voice_conversation).start()
return event
# Handle trigger key - Dual Mode (Hold or Toggle) + Voice Conversation
if keycode == self.trigger_key:
key_pressed = self._is_key_pressed(keycode, flags)
# Prevent duplicate events
if key_pressed == self.trigger_key_currently_pressed:
return event
self.trigger_key_currently_pressed = key_pressed
# Voice conversation stop is handled by Fn/Esc.
if self.is_voice_conversation:
return event
# Normal dictation trigger
if key_pressed and not self.is_recording:
threading.Thread(target=lambda: self.start_recording(is_hold_mode=True)).start()
elif not key_pressed:
# User released trigger key
if self.is_recording:
threading.Thread(target=self.stop_recording).start()
return event
def cancel_recording(self):
"""Cancel the current dictation without pasting text (ChatGPT only)"""
if not self.is_recording and not self.waiting_for_page:
return
# Gemini does not support cancel
if self.current_service == "Gemini":
return
# Cancel any pending auto-start
if self.waiting_for_page:
self.waiting_for_page = False
self.should_auto_start = False
if self.is_recording:
self.is_recording = False
self._play_sound(self._sound_stop)
self.current_state = "PROCESSING"
self.status_item.title = "Status: ⏳ Cancelling..."
try:
self.chrome.cancel_dictation(preferred_location=self.service_tab_location)
except Exception:
pass
# Push dedicated window to back before restoring focus
if self.service_tab_location:
try:
self.chrome.demote_window(self.service_tab_location[0])
except Exception:
pass
# Restore focus to original app
if self.target_app:
time.sleep(0.1)
self.target_app.activateWithOptions_(NSApplicationActivateIgnoringOtherApps)
self.current_state = "IDLE"
self.status_item.title = "Status: Ready"
def start_voice_conversation(self):
"""Start a ChatGPT real-time voice conversation."""
if self.is_recording or self.is_voice_conversation or self._voice_conversation_starting:
return
if self.current_service != "ChatGPT":
rumps.notification("MicPipe", "Voice Mode",
"Voice conversation is only available with the ChatGPT service.")
return
self._voice_conversation_starting = True
self._reset_voice_activity_tracking()
# Record current app for focus restoration
self.target_app = NSWorkspace.sharedWorkspace().frontmostApplication()
# Ensure dedicated window exists
location, created = self._ensure_dedicated_window()
if not location:
self._voice_conversation_starting = False
rumps.notification(
"MicPipe",
"Window Error",
self._window_creation_failure_message(self.chrome, self.current_service)
)
return
self.service_tab_location = location
if created:
self._voice_conversation_starting = False
rumps.notification("MicPipe", "Voice Mode",
"The ChatGPT page is still loading. Please try again shortly.")
return
# Check page readiness
ready_res = self.chrome.is_page_ready(preferred_location=self.service_tab_location)
status = self._get_ready_status(ready_res)
if status != "READY":
self._voice_conversation_starting = False
rumps.notification("MicPipe", "Voice Mode",
"The ChatGPT page is not ready yet. Please try again shortly.")
return
if self.service_tab_location:
try:
revealed = self.chrome.reveal_window(
self.service_tab_location[0], bounds=self.voice_bounds
)
if not revealed:
logger.warning("Failed to reveal ChatGPT window before starting voice.")
time.sleep(0.8)
except Exception as e:
logger.warning(f"Failed to reveal ChatGPT window before starting voice: {e}")
# Click the "Use Voice" button. If the composer has pending text,
# clear the draft first and wait for the voice button to return.
res = self.chrome.start_voice_conversation(preferred_location=self.service_tab_location)
if res and "VOICE_DRAFT_CLEARED" in res:
logger.info("Voice start fallback: cleared pending composer draft before retry.")
max_wait_attempts = 10
for attempt in range(max_wait_attempts):
time.sleep(0.25)
res = self.chrome.start_voice_conversation(preferred_location=self.service_tab_location)
if res and "VOICE_START_CLICKED" in res:
logger.info(
f"Voice button became available after clearing pending draft "
f"(wait {0.25 * (attempt + 1):.2f}s)."
)