micpipe/micpipe.py at master · herrkaefer/micpipe · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import logging
import time
import os
import json
import Quartz
import threading
import re
import rumps
import argparse
from AppKit import NSWorkspace, NSApplicationActivateIgnoringOtherApps, NSSound, NSScreen
from chrome_script import ChatGPTChrome, GeminiChrome
from clipboard_guard import snapshot_clipboard
from paste_tool import paste_text
from state_manager import MicPipeStateStore

# ============================================================
# HOTKEY CONFIGURATION - Change the keycode below to customize
# ============================================================
# Common keycodes for reference:
#   63  - Fn (Function key)
#   54  - Right Command
#   55  - Left Command
#   58  - Right Option
#   61  - Left Option
#   59  - Right Control
#   62  - Left Control
#   48  - Tab
#   53  - Escape
#   51  - Delete (Backspace)
#   117 - Forward Delete
#   36  - Return (Enter)
#   49  - Space
# ============================================================
__version__ = "1.5.1"
VOICE_IDLE_TIMEOUT_SECONDS = 20
VOICE_IDLE_TIMEOUT_OPTIONS = [0, 10, 15, 20, 25, 30]

def configure_logging(debug: bool):
    logging.basicConfig(
        level=logging.DEBUG if debug else logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )

logger = logging.getLogger(__name__)

# ============================================================

class MicPipeApp(rumps.App):
    _TAB_LOC_RE = re.compile(r"(?:USED_WIN_ID|FALLBACK_WIN_ID)=(\d+),TAB=(\d+):")

    def __init__(self, debug: bool = False):
        super(MicPipeApp, self).__init__("MicPipe", quit_button="Quit")
        self.base_path = os.path.dirname(__file__)
        self.icon = os.path.join(self.base_path, "assets/icon_idle_template.png")
        self.template = True  # Enable template mode for idle icon
        self.state_path = os.path.join(
            os.path.expanduser("~"),
            "Library",
            "Application Support",
            "MicPipe",
            "micpipe_state.json",
        )
        self.state_store = MicPipeStateStore(self.state_path, logger)
        self.debug = debug
        self.dedicated_bounds = self._compute_dedicated_bounds(debug)
        self.voice_bounds = self._compute_voice_bounds(debug)

        # Load saved state
        state = self.state_store.load()
        self.current_service = state["current_service"]
        self.sound_enabled = state["sound_enabled"]
        self.dedicated_windows = state["dedicated_windows"]
        self.trigger_key = state["trigger_key"]
        self.voice_idle_timeout_seconds = state["voice_idle_timeout_seconds"]
        self.pipe_slots = state["pipe_slots"]
        self.current_pipe_slot = state["current_pipe_slot"]
        self.chatgpt_chrome = ChatGPTChrome()
        self.gemini_chrome = GeminiChrome()
        self.chrome = self.chatgpt_chrome if self.current_service == "ChatGPT" else self.gemini_chrome  # Active controller

        self.is_recording = False
        self.is_voice_conversation = False
        self._voice_conversation_starting = False
        self._voice_activity_signature = ""
        self._last_voice_activity_at = 0.0
        self._last_voice_activity_check_at = 0.0
        self._voice_activity_check_inflight = False
        self._voice_idle_stop_requested = False
        self.current_state = "IDLE"
        self.animation_frame = 0
        self.tap = None

        # Internal state
        self.target_app = None
        self.target_is_service_page = False  # True if triggered from ChatGPT/Gemini page
        self.trigger_key_currently_pressed = False
        self.voice_fn_currently_pressed = False
        self.should_auto_start = False
        self.waiting_for_page = False
        self.service_tab_location = None  # Dedicated window location
        self.dedicated_window = None
        self._sound_start = os.path.join(self.base_path, "assets", "sound_start.wav")
        self._sound_stop = os.path.join(self.base_path, "assets", "sound_stop.wav")
        self._sound_voice_start = os.path.join(self.base_path, "assets", "sound_voice_start.wav")
        self._sound_voice_stop = os.path.join(self.base_path, "assets", "sound_voice_stop.wav")
        self._cmd_file = os.path.join(os.path.expanduser("~"), "Library", "Application Support", "MicPipe", "cmd")

        # Ensure dedicated window exists at startup (in background)
        self._ensure_dedicated_window()
        threading.Thread(target=self._check_service_ready_on_startup).start()

        # Build menu
        self.status_item = rumps.MenuItem("Status: Ready", callback=None)

        # Service selection submenu
        self.service_chatgpt = rumps.MenuItem("ChatGPT", callback=self.select_chatgpt)
        self.service_chatgpt.state = 1 if self.current_service == "ChatGPT" else 0
        self.service_gemini = rumps.MenuItem("Gemini", callback=self.select_gemini)
        self.service_gemini.state = 1 if self.current_service == "Gemini" else 0
        self.service_menu = rumps.MenuItem("Service")
        self.service_menu.add(self.service_chatgpt)
        self.service_menu.add(self.service_gemini)

        # Hotkey selection submenu
        self.hotkey_menu = rumps.MenuItem("Hotkey")
        self.hotkey_items = {}
        for keycode, display_name in MicPipeStateStore.HOTKEY_OPTIONS:
            item = rumps.MenuItem(display_name, callback=self._make_hotkey_callback(keycode))
            item.state = 1 if keycode == self.trigger_key else 0
            self.hotkey_items[keycode] = item
            self.hotkey_menu.add(item)

        self.voice_idle_menu = rumps.MenuItem("  Auto-Stop Delay")
        self.voice_idle_items = {}
        for seconds in VOICE_IDLE_TIMEOUT_OPTIONS:
            label = "Off" if seconds == 0 else f"{seconds}s"
            item = rumps.MenuItem(label, callback=self._make_voice_idle_timeout_callback(seconds))
            item.state = 1 if seconds == self.voice_idle_timeout_seconds else 0
            self.voice_idle_items[seconds] = item
            self.voice_idle_menu.add(item)
        self.realtime_voice_start_item = rumps.MenuItem("", callback=None)
        self.realtime_voice_stop_item = rumps.MenuItem("", callback=None)

        # AI Pipe submenu
        self.pipe_menu = rumps.MenuItem("AI Pipe")
        self.pipe_items = {}

        # Off option
        off_item = rumps.MenuItem("Off (Direct Voice-to-Text)", callback=self._make_pipe_callback(-1))

        off_item.state = 1 if self.current_pipe_slot == -1 else 0
        self.pipe_items[-1] = off_item
        self.pipe_menu.add(off_item)

        # Conversation Mode option (special slot -2: no preset prompt, direct AI processing)
        conv_mode_item = rumps.MenuItem("Conversation Mode (AI follows your spoken instructions)", callback=self._make_pipe_callback(-2))
        conv_mode_item.state = 1 if self.current_pipe_slot == -2 else 0
        self._conv_mode_item = conv_mode_item
        self.pipe_items[-2] = conv_mode_item
        self.pipe_menu.add(conv_mode_item)


        self.pipe_menu.add(None)  # Separator

        # Slot options - click to select, long text shows title, option-click to edit
        for i in range(5):
            slot_label = self._get_slot_label(i)
            # Click selects the slot; we'll add edit via a submenu per slot
            slot_submenu = rumps.MenuItem(slot_label)
            slot_submenu.state = 1 if self.current_pipe_slot == i else 0

            # Add submenu items: Select and Edit
            select_item = rumps.MenuItem("✓ Use This Prompt", callback=self._make_pipe_callback(i))
            edit_item = rumps.MenuItem("✎ Edit...", callback=self._make_edit_slot_callback(i))
            slot_submenu.add(select_item)
            slot_submenu.add(edit_item)

            self.pipe_items[i] = slot_submenu
            self.pipe_menu.add(slot_submenu)

        self.pipe_menu.add(None)  # Separator
        # Add non-clickable note about latency
        latency_note = rumps.MenuItem("Note: AI processing adds latency", callback=None)
        self.pipe_menu.add(latency_note)
        self.hold_mode_info = rumps.MenuItem(f"  Hold → Hold to Speak", callback=None)
        self.toggle_mode_info = rumps.MenuItem(f"  Click → Toggle Start/Stop", callback=None)
        self.cancel_mode_info = rumps.MenuItem("  Press Esc → Cancel Dictation", callback=None)
        self.dictation_section_title = rumps.MenuItem("Dictation", callback=None)
        self.realtime_voice_section_title = rumps.MenuItem("ChatGPT Realtime Voice Chat", callback=None)
        self.system_section_title = rumps.MenuItem("System", callback=None)
        self.sound_toggle_item = rumps.MenuItem(
            "Sound: On" if self.sound_enabled else "Sound: Off",
            callback=self.toggle_sound
        )
        self.reset_item = rumps.MenuItem("Reset (Self-Check & Repair)", callback=self.reset_app)
        self.version_info = rumps.MenuItem(f"Version: {__version__}", callback=None)
        self._refresh_voice_menu_info()

        self.menu = [
            self.status_item,
            None,  # Separator
            self.dictation_section_title,
            self.service_menu,
            self.hotkey_menu,
            self.pipe_menu,
            self.hold_mode_info,
            self.toggle_mode_info,
            self.cancel_mode_info,
            None,  # Separator
            self.realtime_voice_section_title,
            self.realtime_voice_start_item,
            self.realtime_voice_stop_item,
            self.voice_idle_menu,
            None,  # Separator
            self.system_section_title,
            self.sound_toggle_item,
            self.reset_item,
            None,  # Separator
            self.version_info
        ]

        # Animation timer (runs at 10Hz)
        self.timer = rumps.Timer(self._update_animation, 0.1)
        self.timer.start()

    def _save_state(self):
        self.state_store.save(
            self.current_service,
            self.sound_enabled,
            self.dedicated_windows,
            self.trigger_key,
            self.voice_idle_timeout_seconds,
            self.pipe_slots,
            self.current_pipe_slot
        )

    def _make_hotkey_callback(self, keycode):
        """Create a callback function for hotkey menu item selection."""
        def callback(_):
            if self.is_recording:
                return  # Don't change hotkey during recording
            # Update checkmarks
            for kc, item in self.hotkey_items.items():
                item.state = 1 if kc == keycode else 0
            self.trigger_key = keycode
            self._refresh_voice_menu_info()
            self._save_state()
            # Show notification about the change
            key_name = self._get_key_name(keycode)
            rumps.notification("MicPipe", "Hotkey Changed", f"New hotkey: {key_name}")
        return callback

    def _refresh_voice_menu_info(self):
        self.realtime_voice_start_item.title = "  Start: Control+Fn"
        self.realtime_voice_stop_item.title = "  Stop: Fn or Esc"

    def _make_voice_idle_timeout_callback(self, seconds):
        """Create a callback for selecting the voice auto-stop timeout."""
        def callback(_):
            for timeout, item in self.voice_idle_items.items():
                item.state = 1 if timeout == seconds else 0
            self.voice_idle_timeout_seconds = seconds
            self._save_state()
            message = "Disabled" if seconds == 0 else f"Auto-stop after {seconds}s"
            rumps.notification("MicPipe", "Voice Auto-Stop", message)
        return callback

    def _get_slot_label(self, slot_index):
        """Get display label for a pipe slot (uses title)"""
        slot = self.pipe_slots[slot_index]
        title = slot.get("title", "") if isinstance(slot, dict) else ""
        prompt = slot.get("prompt", "") if isinstance(slot, dict) else slot

        if title:
            return f"Slot {slot_index + 1}: {title}"
        elif prompt:
            preview = prompt[:25] + "..." if len(prompt) > 25 else prompt
            return f"Slot {slot_index + 1}: {preview}"
        else:
            return f"Slot {slot_index + 1}: (empty)"

    def _make_pipe_callback(self, slot_index):
        """Create callback for selecting a pipe slot"""
        def callback(_):
            if self.is_recording:
                return  # Don't change during recording
            # Update checkmarks
            for idx, item in self.pipe_items.items():
                item.state = 1 if idx == slot_index else 0
            self.current_pipe_slot = slot_index
            self._save_state()

            # Show notification
            if slot_index == -1:
                rumps.notification("MicPipe", "AI Pipe", "AI Pipe disabled")
            else:
                slot = self.pipe_slots[slot_index]
                title = slot.get("title", "") if isinstance(slot, dict) else ""
                msg = title if title else f"Slot {slot_index + 1}"
                rumps.notification("MicPipe", "AI Pipe", f"Using: {msg}")
        return callback

    def _make_edit_slot_callback(self, slot_index):
        """Create callback for editing a pipe slot using standalone editor"""
        def callback(_):
            import subprocess
            import json
            import os
            import threading
            import sys


            slot = self.pipe_slots[slot_index]
            current_title = slot.get("title", "") if isinstance(slot, dict) else ""
            current_prompt = slot.get("prompt", "") if isinstance(slot, dict) else slot

            def run_editor():
                try:
                    # Get path to editor script
                    script_dir = os.path.dirname(os.path.abspath(__file__))
                    editor_path = os.path.join(script_dir, "slot_editor.py")

                    # Run editor as separate process using Popen
                    proc = subprocess.Popen(
                        [sys.executable, editor_path, str(slot_index), current_title, current_prompt],
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE,
                        text=True
                    )

                    # Wait for editor to close
                    stdout, stderr = proc.communicate(timeout=300)

                    if proc.returncode == 0 and stdout.strip():
                        data = json.loads(stdout.strip())
                        if data.get("saved"):
                            self.pipe_slots[slot_index] = {
                                "title": data["title"],
                                "prompt": data["prompt"]
                            }
                            self._save_state()

                            # Update menu
                            new_label = self._get_slot_label(slot_index)
                            self.pipe_items[slot_index].title = new_label

                            rumps.notification("MicPipe", "Slot Updated", f"Slot {slot_index + 1} has been updated")
                    else:
                        if stderr:
                            logger.debug(f"Editor stderr: {stderr}")
                except Exception as e:
                    logger.error(f"Editor failed: {e}")

            # Run in thread to not block
            thread = threading.Thread(target=run_editor, daemon=True)
            thread.start()

        return callback


    def _compute_dedicated_bounds(self, debug: bool):
        # Fixed window size that ensures the microphone button is visible
        width = 700
        height = 500

        if debug:
            # Debug mode: visible window at top-left
            return (0, 0, width, height)

        # Production mode: Push window to bottom-right corner, minimizing visible area.
        # macOS enforces that at least a few pixels remain visible. Through testing:
        # - Pushing right: left = screen_width - 5 leaves only ~5px visible
        # - Pushing down: top = screen_height - 50 works well
        try:
            screens = NSScreen.screens()
            if not screens:
                return (20000, 2000, 20000 + width, 2000 + height)
            # Get the main screen dimensions
            main_screen = screens[0].frame()
            screen_width = int(main_screen.size.width)
            screen_height = int(main_screen.size.height)
            # Position at bottom-right corner, leaving minimal visible area
            left = screen_width - 5  # Only ~5px visible on right edge
            top = screen_height - 50  # Near bottom of screen
            return (left, top, left + width, top + height)
        except Exception:
            return (20000, 2000, 20000 + width, 2000 + height)

    def _compute_voice_bounds(self, debug: bool):
        width = 760
        height = 620

        if debug:
            return (120, 90, 120 + width, 90 + height)

        try:
            screens = NSScreen.screens()
            if not screens:
                return (160, 100, 160 + width, 100 + height)
            visible = screens[0].visibleFrame()
            margin_left = 32
            margin_top = 72
            left = int(visible.origin.x + margin_left)
            top = int(margin_top)
            return (left, top, left + width, top + height)
        except Exception:
            return (160, 100, 160 + width, 100 + height)

    def _get_ready_status(self, res: str) -> str:
        if not res or not res.startswith("SUCCESS"):
            return ""
        return res.rsplit(":", 1)[-1]

    def _prompt_service_login(self, details: str):
        location = self.service_tab_location or self.dedicated_windows.get(self.current_service)
        if location:
            try:
                self.chrome.reveal_window(location[0])
            except Exception:
                pass
        rumps.notification(
            "MicPipe",
            f"{self.current_service} Login or Permission Issue",
            details
        )

    def _window_creation_failure_message(self, chrome, service_name: str) -> str:
        error = (getattr(chrome, "last_error", "") or "").strip()
        if ":-1743:" in error or "Not authorized to send Apple events to Google Chrome" in error:
            return (
                "MicPipe does not currently have permission to control Google Chrome. "
                "Go to System Settings > Privacy & Security > Automation, "
                "allow your terminal or launcher app to control Google Chrome, then try again."
            )
        if ":-1728:" in error or "Can't get application \"Google Chrome\"" in error:
            return (
                "Google Chrome could not be accessed. "
                "Open Chrome once manually, confirm Automation permission, and try again."
            )
        if error == "EMPTY_RESULT":
            return (
                f"Could not create the dedicated {service_name} window. "
                "Make sure Google Chrome is installed and Automation permission is working."
            )
        if error:
            return f"Could not create the dedicated {service_name} window. Details: {error}"
        return f"Could not create the dedicated {service_name} window."

    def _update_service_tab_location_from_result(self, result: str):
        """Update self.service_tab_location when Chrome reports the actual window/tab used."""
        if not result:
            return
        m = self._TAB_LOC_RE.search(result)
        if not m:
            return
        try:
            win_id = int(m.group(1))
            tab_idx = int(m.group(2))
        except Exception:
            return
        if win_id > 0 and tab_idx > 0:
            old = self.service_tab_location
            self.service_tab_location = (win_id, tab_idx)
            self.dedicated_window = self.service_tab_location
            self.dedicated_windows[self.current_service] = self.service_tab_location
            self._save_state()
            if old != self.service_tab_location:
                logger.debug(f"Updated service_tab_location: {old} -> {self.service_tab_location}")

    def _ensure_dedicated_window(self):
        """Ensure the dedicated window exists for the current service."""
        try:
            chrome = self.chatgpt_chrome if self.current_service == "ChatGPT" else self.gemini_chrome
            service_name = self.current_service

            location = self.dedicated_windows.get(service_name)
            if location and chrome.is_window_alive(*location):
                self.dedicated_window = location
                self.service_tab_location = location
                try:
                    chrome.set_window_bounds(location[0], self.dedicated_bounds)
                except Exception:
                    pass
                chrome.demote_window(location[0])
                self._save_state()
                return location, False

            new_location = chrome.create_dedicated_window(bounds=self.dedicated_bounds)
            if new_location:
                self.dedicated_windows[service_name] = new_location
                self.dedicated_window = new_location
                self.service_tab_location = new_location
                logger.info(f"{service_name} dedicated window created: {new_location}")
                chrome.demote_window(new_location[0])
                self._save_state()
                return new_location, True

            logger.error(
                f"Failed to create dedicated window for {service_name}. "
                f"Last error: {getattr(chrome, 'last_error', '')}"
            )
            return None, False
        except Exception as e:
            logger.error(f"Failed to ensure dedicated window: {e}")
            return None, False

    def _hide_dedicated_window(self):
        if not self.service_tab_location:
            return
        try:
            self.chrome.set_window_bounds(self.service_tab_location[0], self.dedicated_bounds)
        except Exception as e:
            logger.debug(f"Failed to restore dedicated window bounds: {e}")
        try:
            self.chrome.demote_window(self.service_tab_location[0])
        except Exception as e:
            logger.debug(f"Failed to demote dedicated window: {e}")

    def select_chatgpt(self, _):
        """Switch to ChatGPT service."""
        if self.is_recording:
            return  # Don't switch during recording
        self.current_service = "ChatGPT"
        self.chrome = self.chatgpt_chrome
        self.service_chatgpt.state = 1
        self.service_gemini.state = 0
        self.cancel_mode_info.title = "  Press Esc → Cancel Dictation"
        self._save_state()
        self._ensure_dedicated_window()

    def select_gemini(self, _):
        """Switch to Gemini service."""
        if self.is_recording:
            return  # Don't switch during recording
        self.current_service = "Gemini"
        self.chrome = self.gemini_chrome
        self.service_chatgpt.state = 0
        self.service_gemini.state = 1
        self.cancel_mode_info.title = "  Press Esc → Cancel (ChatGPT only)"
        self._save_state()
        self._ensure_dedicated_window()

    def _check_cmd_file(self):
        """Check for CLI command file and execute if present."""
        try:
            if not os.path.exists(self._cmd_file):
                return
            with open(self._cmd_file, "r") as f:
                cmd = f.read().strip()
            os.remove(self._cmd_file)
            if cmd == "voice-start":
                threading.Thread(target=self.start_voice_conversation).start()
            elif cmd == "voice-stop":
                threading.Thread(target=self.stop_voice_conversation).start()
            elif cmd == "voice-toggle":
                if self.is_voice_conversation:
                    threading.Thread(target=self.stop_voice_conversation).start()
                else:
                    threading.Thread(target=self.start_voice_conversation).start()
        except Exception as e:
            logger.debug(f"Failed to process CLI command file: {e}")

    def _extract_success_payload(self, result: str) -> str:
        if not result or not result.startswith("SUCCESS:"):
            return ""
        payload = result.split("SUCCESS:", 1)[1]
        return re.sub(r"^(?:USED_WIN_ID|FALLBACK_WIN_ID)=\d+,TAB=\d+:", "", payload, count=1)

    def _reset_voice_activity_tracking(self):
        self._voice_activity_signature = ""
        self._last_voice_activity_at = 0.0
        self._last_voice_activity_check_at = 0.0
        self._voice_activity_check_inflight = False
        self._voice_idle_stop_requested = False

    def _summarize_voice_activity_signature(self, signature: str) -> str:
        if not signature:
            return "<empty>"
        compact = re.sub(r"\s+", " ", signature).strip()
        if len(compact) > 120:
            compact = "..." + compact[-117:]
        return compact

    def _get_voice_activity_signature(self):
        if not self.service_tab_location or self.current_service != "ChatGPT":
            return False, ""
        try:
            res = self.chrome.get_voice_activity_snapshot(
                preferred_location=self.service_tab_location
            )
        except Exception as e:
            logger.debug(f"Voice activity snapshot failed: {e}")
            return False, ""
        payload = self._extract_success_payload(res)
        if not payload:
            return False, ""
        try:
            data = json.loads(payload)
        except Exception as e:
            logger.debug(f"Failed to parse voice activity snapshot: {e}; payload={payload[:200]}")
            return False, ""
        assistant_text = str(data.get("assistant_text") or "").strip()
        user_text = str(data.get("user_text") or "").strip()
        assistant_count = int(data.get("assistant_count") or 0)
        user_count = int(data.get("user_count") or 0)
        signature = (
            f"a#{assistant_count}:{assistant_text}"
            f"|u#{user_count}:{user_text}"
        )
        return bool(data.get("active")), signature

    def _check_voice_idle_timeout(self):
        try:
            if self.voice_idle_timeout_seconds <= 0:
                return
            _active, signature = self._get_voice_activity_signature()
            now = time.time()
            self._last_voice_activity_check_at = now
            if not self.is_voice_conversation:
                return
            if signature and signature != self._voice_activity_signature:
                self._voice_activity_signature = signature
                self._last_voice_activity_at = now
                logger.info(
                    "Voice activity detected; idle timer reset. "
                    f"text={self._summarize_voice_activity_signature(signature)}"
                )
                return
            if not self._last_voice_activity_at:
                self._last_voice_activity_at = now
                logger.info("Voice idle watcher initialized.")
            idle_for = now - self._last_voice_activity_at
            logger.debug(
                f"Voice idle check: idle_for={idle_for:.1f}s, "
                f"text={self._summarize_voice_activity_signature(self._voice_activity_signature)}"
            )
            if idle_for >= self.voice_idle_timeout_seconds and not self._voice_idle_stop_requested:
                self._voice_idle_stop_requested = True
                logger.info(
                    f"Voice conversation idle for {idle_for:.1f}s; auto-stopping."
                )
                threading.Thread(target=self.stop_voice_conversation, daemon=True).start()
        except Exception as e:
            logger.debug(f"Voice idle check failed: {e}")
        finally:
            self._voice_activity_check_inflight = False

    def _update_animation(self, _):
        """Update menu bar icon based on current state"""
        self.animation_frame += 1
        # Check for CLI commands (every 5 frames = 2Hz, lightweight stat() call)
        if self.animation_frame % 5 == 0:
            self._check_cmd_file()
        if (
            self.is_voice_conversation
            and self.voice_idle_timeout_seconds > 0
            and not self._voice_activity_check_inflight
            and self.animation_frame % 10 == 0
        ):
            self._voice_activity_check_inflight = True
            threading.Thread(target=self._check_voice_idle_timeout, daemon=True).start()
        if self.tap and self.animation_frame % 50 == 0:
            try:
                if not Quartz.CGEventTapIsEnabled(self.tap):
                    logger.warning("Event tap disabled by macOS, re-enabling.")
                    Quartz.CGEventTapEnable(self.tap, True)
            except Exception as e:
                logger.debug(f"Failed to validate/re-enable event tap: {e}")

        if self.current_state == "IDLE":
            idle_icon = os.path.join(self.base_path, "assets/icon_idle_template.png")
            if self.icon != idle_icon:
                self.icon = idle_icon
                self.template = True
                self.title = None

        elif self.current_state == "RECORDING":
            # Pulsating red dot animation (every 2 frames = 5Hz)
            if self.animation_frame % 2 == 0:
                frame = (self.animation_frame // 2) % 4 + 1
                self.icon = os.path.join(self.base_path, f"assets/icon_rec_{frame}.png")
                self.template = False # Red color needs template=False
                self.title = None

        elif self.current_state == "VOICE_CONVERSATION":
            # Pulsating purple dot animation for voice conversation
            if self.animation_frame % 2 == 0:
                frame = (self.animation_frame // 2) % 4 + 1
                self.icon = os.path.join(self.base_path, f"assets/icon_voice_{frame}.png")
                self.template = False
                self.title = None

        elif self.current_state == "WAITING" or self.current_state == "PROCESSING":
            # Spinning icon animation (every 2 frames = 5Hz)
            if self.animation_frame % 2 == 0:
                frame = (self.animation_frame // 2) % 4 + 1
                self.icon = os.path.join(self.base_path, f"assets/icon_pro_{frame}.png")
                self.template = True
                self.title = None


    def _get_key_name(self, keycode):
        """Get human-readable key name from keycode"""
        key_names = {
            63: "Fn",
            54: "Right Cmd",
            55: "Left Cmd",
            58: "Right Option",
            61: "Left Option",
            59: "Right Control",
            62: "Left Control",
            60: "Left Shift",
            56: "Right Shift",
            48: "Tab",
            53: "Esc",
            51: "Delete",
            117: "Forward Delete",
            36: "Return",
            49: "Space",
        }
        return key_names.get(keycode, f"Key {keycode}")

    def _play_sound(self, path):
        if not self.sound_enabled:
            return
        try:
            sound = NSSound.alloc().initWithContentsOfFile_byReference_(path, True)
            if sound:
                sound.play()
        except Exception:
            pass

    def toggle_sound(self, _):
        self.sound_enabled = not self.sound_enabled
        self.sound_toggle_item.title = "Sound: On" if self.sound_enabled else "Sound: Off"
        self._save_state()

    def _is_recording_active(self) -> bool:
        try:
            res = self.chrome.is_recording_active(preferred_location=self.service_tab_location)
        except Exception as e:
            logger.debug(f"Recording-state check raised exception: {e}")
            return False
        if isinstance(res, bool):
            return res
        if not res:
            return False
        status = str(res)
        if status.startswith("SUCCESS"):
            status = status.rsplit(":", 1)[-1]
        return status == "ACTIVE"

    def _start_dictation_with_verification(self):
        """Start dictation and verify the page actually entered recording state."""
        max_attempts = 2 if self.current_service == "ChatGPT" else 1
        last_result = ""
        for attempt in range(max_attempts):
            res = self.chrome.start_dictation(preferred_location=self.service_tab_location)
            last_result = res
            if not res.startswith("SUCCESS"):
                return False, res
            self._update_service_tab_location_from_result(res)
            time.sleep(0.5)
            if self._is_recording_active():
                return True, res
            logger.warning(
                f"Recording verification failed after start click "
                f"(attempt {attempt + 1}/{max_attempts})."
            )
        return False, last_result or "VERIFY_FAILED"

    def reset_app(self, _):
        """Run a lightweight self-check and recovery routine."""
        previous_app = NSWorkspace.sharedWorkspace().frontmostApplication()

        tap_msg = "Event Tap unavailable"
        if self.tap:
            try:
                Quartz.CGEventTapEnable(self.tap, True)
                tap_ok = bool(Quartz.CGEventTapIsEnabled(self.tap))
                tap_msg = "Event Tap OK" if tap_ok else "Event Tap re-enable failed"
            except Exception as e:
                tap_msg = f"Event Tap error: {e}"

        service_name = self.current_service
        old_location = self.service_tab_location or self.dedicated_windows.get(service_name)
        closed_old = False
        if old_location:
            try:
                closed_old = self.chrome.close_window(old_location[0])
            except Exception:
                closed_old = False

        self.dedicated_windows.pop(service_name, None)
        self.service_tab_location = None
        self.dedicated_window = None

        new_location, _ = self._ensure_dedicated_window()
        window_ok = bool(new_location)
        if new_location:
            try:
                # Extra guard: keep dedicated window at the bottom after repair.
                self.chrome.demote_window(new_location[0])
            except Exception:
                pass

        self.is_recording = False
        self.is_voice_conversation = False
        self._voice_conversation_starting = False
        self._reset_voice_activity_tracking()
        self.waiting_for_page = False
        self.should_auto_start = False
        self.trigger_key_currently_pressed = False
        self.voice_fn_currently_pressed = False
        self.current_state = "IDLE"
        self.status_item.title = "Status: Ready"

        # Demote controls Chrome window stacking; this restores the app focus
        # so repair does not leave user on the dedicated Chrome window.
        if previous_app:
            try:
                time.sleep(0.05)
                previous_app.activateWithOptions_(NSApplicationActivateIgnoringOtherApps)
            except Exception:
                pass

        window_msg = "Window rebuilt" if window_ok else "Window rebuild failed"
        if old_location and not closed_old:
            window_msg = "Window rebuild attempted (old window close failed)"
        logger.info(f"Reset complete: {tap_msg}; {window_msg}; state reset")
        rumps.notification(
            "MicPipe",
            "Reset (Self-Check & Repair)",
            f"{tap_msg} | {window_msg} | State reset"
        )

    def event_callback(self, proxy, event_type, event, refcon):
        """System event callback: Monitor trigger key"""
        if event_type == Quartz.kCGEventKeyDown:
            keycode = Quartz.CGEventGetIntegerValueField(event, 9)
            if keycode == 53:  # Esc
                if self.is_voice_conversation:
                    threading.Thread(target=self.stop_voice_conversation).start()
                    return event
                # Gemini does not support cancel, only ChatGPT does
                if self.current_service == "ChatGPT" and (self.is_recording or self.waiting_for_page):
                    threading.Thread(target=self.cancel_recording).start()
                return event

        if event_type == Quartz.kCGEventFlagsChanged:
            keycode = Quartz.CGEventGetIntegerValueField(event, 9)
            flags = Quartz.CGEventGetFlags(event)

            # Fixed shortcut for realtime voice: Control+Fn starts, Fn stops.
            if keycode == 63:
                fn_pressed = self._is_key_pressed(keycode, flags)

                if fn_pressed != self.voice_fn_currently_pressed:
                    self.voice_fn_currently_pressed = fn_pressed

                    if self.is_voice_conversation:
                        if fn_pressed:
                            threading.Thread(target=self.stop_voice_conversation).start()
                        return event

                    if fn_pressed and not self.is_recording:
                        control_held = bool(flags & Quartz.kCGEventFlagMaskControl)
                        if control_held and self.current_service == "ChatGPT":
                            threading.Thread(target=self.start_voice_conversation).start()
                            return event

            # Handle trigger key - Dual Mode (Hold or Toggle) + Voice Conversation
            if keycode == self.trigger_key:
                key_pressed = self._is_key_pressed(keycode, flags)

                # Prevent duplicate events
                if key_pressed == self.trigger_key_currently_pressed:
                    return event
                self.trigger_key_currently_pressed = key_pressed

                # Voice conversation stop is handled by Fn/Esc.
                if self.is_voice_conversation:
                    return event

                # Normal dictation trigger
                if key_pressed and not self.is_recording:
                    threading.Thread(target=lambda: self.start_recording(is_hold_mode=True)).start()
                elif not key_pressed:
                    # User released trigger key
                    if self.is_recording:
                        threading.Thread(target=self.stop_recording).start()

        return event

    def cancel_recording(self):
        """Cancel the current dictation without pasting text (ChatGPT only)"""
        if not self.is_recording and not self.waiting_for_page:
            return

        # Gemini does not support cancel
        if self.current_service == "Gemini":
            return

        # Cancel any pending auto-start
        if self.waiting_for_page:
            self.waiting_for_page = False
            self.should_auto_start = False

        if self.is_recording:
            self.is_recording = False
            self._play_sound(self._sound_stop)
            self.current_state = "PROCESSING"
            self.status_item.title = "Status: ⏳ Cancelling..."
            try:
                self.chrome.cancel_dictation(preferred_location=self.service_tab_location)
            except Exception:
                pass

        # Push dedicated window to back before restoring focus
        if self.service_tab_location:
            try:
                self.chrome.demote_window(self.service_tab_location[0])
            except Exception:
                pass

        # Restore focus to original app
        if self.target_app:
            time.sleep(0.1)
            self.target_app.activateWithOptions_(NSApplicationActivateIgnoringOtherApps)

        self.current_state = "IDLE"
        self.status_item.title = "Status: Ready"

    def start_voice_conversation(self):
        """Start a ChatGPT real-time voice conversation."""
        if self.is_recording or self.is_voice_conversation or self._voice_conversation_starting:
            return
        if self.current_service != "ChatGPT":
            rumps.notification("MicPipe", "Voice Mode",
                               "Voice conversation is only available with the ChatGPT service.")
            return

        self._voice_conversation_starting = True
        self._reset_voice_activity_tracking()

        # Record current app for focus restoration
        self.target_app = NSWorkspace.sharedWorkspace().frontmostApplication()

        # Ensure dedicated window exists
        location, created = self._ensure_dedicated_window()
        if not location:
            self._voice_conversation_starting = False
            rumps.notification(
                "MicPipe",
                "Window Error",
                self._window_creation_failure_message(self.chrome, self.current_service)
            )
            return
        self.service_tab_location = location

        if created:
            self._voice_conversation_starting = False
            rumps.notification("MicPipe", "Voice Mode",
                               "The ChatGPT page is still loading. Please try again shortly.")
            return

        # Check page readiness
        ready_res = self.chrome.is_page_ready(preferred_location=self.service_tab_location)
        status = self._get_ready_status(ready_res)
        if status != "READY":
            self._voice_conversation_starting = False
            rumps.notification("MicPipe", "Voice Mode",
                               "The ChatGPT page is not ready yet. Please try again shortly.")
            return

        if self.service_tab_location:
            try:
                revealed = self.chrome.reveal_window(
                    self.service_tab_location[0], bounds=self.voice_bounds
                )
                if not revealed:
                    logger.warning("Failed to reveal ChatGPT window before starting voice.")
                time.sleep(0.8)
            except Exception as e:
                logger.warning(f"Failed to reveal ChatGPT window before starting voice: {e}")

        # Click the "Use Voice" button. If the composer has pending text,
        # clear the draft first and wait for the voice button to return.
        res = self.chrome.start_voice_conversation(preferred_location=self.service_tab_location)
        if res and "VOICE_DRAFT_CLEARED" in res:
            logger.info("Voice start fallback: cleared pending composer draft before retry.")
            max_wait_attempts = 10
            for attempt in range(max_wait_attempts):
                time.sleep(0.25)
                res = self.chrome.start_voice_conversation(preferred_location=self.service_tab_location)
                if res and "VOICE_START_CLICKED" in res:
                    logger.info(
                        f"Voice button became available after clearing pending draft "
                        f"(wait {0.25 * (attempt + 1):.2f}s)."
                    )