Skip to content

Commit 2a46e09

Browse files
author
Chris Warren-Smith
committed
LLAMA: nitro agent - wip
1 parent 87981a1 commit 2a46e09

1 file changed

Lines changed: 104 additions & 65 deletions

File tree

llama/nitro.cpp

Lines changed: 104 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
// TOOL:CURL <url>
3939
//
4040
// Copyright (C) 2026 Chris Warren-Smith — GPLv2 or later
41-
// ─── Standard library ────────────────────────────────────────────────────────
41+
//
42+
4243
#include <algorithm>
4344
#include <chrono>
4445
#include <ctime>
@@ -50,14 +51,13 @@
5051
#include <sstream>
5152
#include <string>
5253
#include <vector>
53-
// ─── curl ─────────────────────────────────────────────────────────────────────
5454
#include <curl/curl.h>
55-
// ─── Integration layer (sole llama.cpp dependency for nitro) ─────────────────
5655
#include "llama-sb.h"
5756
#include "llama-sb-rag.h"
58-
// ─── TUI ─────────────────────────────────────────────────────────────────────
5957
#include <notcurses/notcurses.h>
58+
6059
namespace fs = std::filesystem;
60+
6161
// ═══════════════════════════════════════════════════════════════════════════
6262
// Forward declarations
6363
// ═══════════════════════════════════════════════════════════════════════════
@@ -75,6 +75,7 @@ static std::string process_tool(const std::string &line, const std::string &san
7575
TuiState &tui);
7676
static std::string build_system_prompt(const std::vector<std::string> &knowledge_files,
7777
const std::string &sandbox);
78+
7879
// ─── RAG indexing ─────────────────────────────────────────────────────────────
7980
static constexpr int BATCH_SIZE = 512;
8081

@@ -448,6 +449,7 @@ struct TuiState {
448449
// ── input ─────────────────────────────────────────────────────────
449450
std::string input_buf;
450451
size_t cursor_pos = 0;
452+
bool mouse_mode = true;
451453
// ── status bar values ─────────────────────────────────────────────
452454
std::string current_model = "none";
453455
float tokens_per_sec = 0.0f;
@@ -489,6 +491,7 @@ struct TuiState {
489491
// handle — or just use the paired helpers below.
490492
struct ncplane *modal_plane = nullptr;
491493
void show_modal_popup(const std::string &message);
494+
void show_help();
492495
void dismiss_modal_popup();
493496
// ── RAG folder picker popup ───────────────────────────────────────
494497
// Presents an interactive directory browser to let the user choose a
@@ -555,7 +558,7 @@ void TuiState::init() {
555558
ncplane_set_base(inputpl, " ", 0,
556559
NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B,
557560
BG_INP_R, BG_INP_G, BG_INP_B));
558-
notcurses_mice_enable(nc, NCMICE_ALL_EVENTS);
561+
notcurses_mice_enable(nc, NCMICE_BUTTON_EVENT);
559562
redraw_all();
560563
}
561564
void TuiState::destroy() {
@@ -784,6 +787,24 @@ void TuiState::show_modal_popup(const std::string &message) {
784787
notcurses_render(nc);
785788
}
786789

790+
void TuiState::show_help() {
791+
append_line("[sys] Commands:");
792+
append_line("[sys] /model [path] load a GGUF model (picker if no path)");
793+
append_line("[sys] /embed [path] load an embedding model (picker if no path)");
794+
append_line("[sys] /rag [path] index file or directory (picker if no path)");
795+
append_line("[sys] /memory KV / VRAM / layer stats");
796+
append_line("[sys] /clear reset conversation");
797+
append_line("[sys] /settings show current settings");
798+
append_line("[sys] /set <key> <value> change a setting live");
799+
append_line("[sys] /help this message");
800+
append_line("[sys] exit / quit exit Nitro");
801+
append_line("[sys] Settable keys (via /set):");
802+
append_line("[sys] temperature top_p top_k min_p penalty_repeat");
803+
append_line("[sys] n_max_tokens penalty_last_n rag_top_k n_gpu_layers");
804+
append_line("[sys] run_allowed (comma-separated list, e.g. python3,make)");
805+
redraw_all();
806+
}
807+
787808
void TuiState::dismiss_modal_popup() {
788809
if (modal_plane) {
789810
ncplane_destroy(modal_plane);
@@ -1052,7 +1073,6 @@ std::string TuiState::readline_blocking() {
10521073
return result;
10531074
}
10541075

1055-
10561076
if (ni.id == NCKEY_UP) {
10571077
// Entering history from a fresh prompt: save current text as draft.
10581078
std::string hist_entry;
@@ -1107,12 +1127,25 @@ std::string TuiState::readline_blocking() {
11071127
notcurses_render(nc);
11081128
continue;
11091129
}
1110-
if (ni.id == NCKEY_SCROLL_DOWN && scroll_offset > 0) {
1130+
if (ni.id == NCKEY_SCROLL_DOWN && scroll_offset > 1) {
11111131
scroll_offset -= 1;
11121132
redraw_chat();
11131133
notcurses_render(nc);
11141134
continue;
11151135
}
1136+
if (ni.id == NCKEY_F01) {
1137+
show_help();
1138+
continue;
1139+
}
1140+
if (ni.id == NCKEY_F02) {
1141+
mouse_mode = !mouse_mode;
1142+
if (mouse_mode) {
1143+
notcurses_mice_enable(nc, NCMICE_BUTTON_EVENT);
1144+
} else {
1145+
notcurses_mice_disable(nc);
1146+
}
1147+
continue;
1148+
}
11161149
if (ni.id == NCKEY_BACKSPACE || ni.id == 127) {
11171150
if (cursor_pos > 0) { input_buf.erase(cursor_pos - 1, 1); --cursor_pos; }
11181151
} else if (ni.id == NCKEY_LEFT) {
@@ -1294,6 +1327,40 @@ bool AgentState::rag_index(const std::string &path, TuiState &tui) {
12941327
return true;
12951328
}
12961329

1330+
// ═══════════════════════════════════════════════════════════════════════════
1331+
// Think-tag filtering
1332+
// ═══════════════════════════════════════════════════════════════════════════
1333+
// Strips everything between (and including) <think>…</think> or the
1334+
// <|think|>…</|think|> variant from a completed line/buffer.
1335+
// Also strips any bare close-tag that appears without a matching open-tag
1336+
// (can happen when the open was in a previous chunk already consumed).
1337+
// Returns the visible text that should be shown to the user.
1338+
std::string filter_think_tags(const std::string &text) {
1339+
static const struct { const char *open; const char *close; } PAIRS[] = {
1340+
{ "<think>", "</think>" },
1341+
{ "<|think|>", "</|think|>" },
1342+
};
1343+
std::string out = text;
1344+
for (auto &p : PAIRS) {
1345+
std::string open(p.open), close(p.close);
1346+
for (;;) {
1347+
auto ob = out.find(open);
1348+
if (ob == std::string::npos) break;
1349+
auto ce = out.find(close, ob);
1350+
if (ce == std::string::npos) {
1351+
out.erase(ob); // no closing tag — strip to end
1352+
break;
1353+
}
1354+
out.erase(ob, ce + close.size() - ob);
1355+
}
1356+
// Strip orphan close-tags (open tag was in an earlier chunk).
1357+
size_t pos = 0;
1358+
while ((pos = out.find(close, pos)) != std::string::npos)
1359+
out.erase(pos, close.size());
1360+
}
1361+
return out;
1362+
}
1363+
12971364
// ═══════════════════════════════════════════════════════════════════════════
12981365
// Agent turn
12991366
// ═══════════════════════════════════════════════════════════════════════════
@@ -1324,29 +1391,32 @@ bool AgentState::run_turn(const std::string &user_message,
13241391
return false;
13251392
}
13261393
tui.append_line("Nitro: ");
1327-
tui.set_thinking(true);
1328-
bool in_think = true;
1394+
// in_think starts false — models that don't use <think> blocks emit
1395+
// visible text immediately. The spinner activates only while thinking.
1396+
bool in_think = false;
1397+
tui.set_thinking(false);
13291398
std::string buffer;
1330-
auto update_think_state = [&](const std::string &text) {
1331-
if (text.find("<think>") != std::string::npos ||
1332-
text.find("<|think|>") != std::string::npos) in_think = true;
1333-
if (text.find("</think>") != std::string::npos ||
1334-
text.find("</|think|>") != std::string::npos) in_think = false;
1335-
};
1336-
auto remove_substr = [](std::string str, const std::string& toRemove) {
1337-
size_t pos = str.find(toRemove);
1338-
while (pos != std::string::npos) {
1339-
str.erase(pos, toRemove.length());
1340-
pos = str.find(toRemove, pos);
1341-
}
1342-
return str;
1343-
};
1344-
13451399
while (iter->_has_next) {
13461400
std::string tok = llama->next(*iter);
1347-
tui.tick_spinner();
1348-
update_think_state(tok);
13491401
buffer += tok;
1402+
// Detect think-tag transitions on the accumulated buffer so we never
1403+
// miss a tag that was split across two tokens.
1404+
bool was_thinking = in_think;
1405+
if (!in_think) {
1406+
if (buffer.find("<think>") != std::string::npos ||
1407+
buffer.find("<|think|>") != std::string::npos)
1408+
in_think = true;
1409+
}
1410+
if (in_think) {
1411+
if (buffer.find("</think>") != std::string::npos ||
1412+
buffer.find("</|think|>") != std::string::npos)
1413+
in_think = false;
1414+
}
1415+
// Update spinner: animate only while in a think block.
1416+
if (in_think || was_thinking) {
1417+
tui.set_thinking(in_think);
1418+
if (in_think) tui.tick_spinner();
1419+
}
13501420
auto nl = buffer.find('\n');
13511421
if (nl != std::string::npos) {
13521422
std::string text_line = buffer.substr(0, nl);
@@ -1408,13 +1478,10 @@ bool AgentState::run_turn(const std::string &user_message,
14081478
}
14091479
buffer.clear();
14101480
} else if (!in_think) {
1411-
text_line = remove_substr(text_line, "</think>");
1412-
text_line = remove_substr(text_line, "</|think|>");
1413-
tui.append_token(text_line + "\n");
1481+
tui.append_token(filter_think_tags(text_line) + "\n");
14141482
}
14151483
}
14161484
}
1417-
14181485
if (!buffer.empty()) {
14191486
std::string trimmed = buffer;
14201487
trimmed.erase(0, trimmed.find_first_not_of(" \t"));
@@ -1425,7 +1492,7 @@ bool AgentState::run_turn(const std::string &user_message,
14251492
std::string tool_result_msg = "TOOL_RESULT: " + result;
14261493
llama->add_message(*iter, "user", tool_result_msg);
14271494
} else if (!in_think) {
1428-
tui.append_token(buffer);
1495+
tui.append_token(filter_think_tags(buffer));
14291496
}
14301497
}
14311498
tui.flush_token_acc();
@@ -1817,7 +1884,7 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
18171884
"## Tool protocol\n"
18181885
"Emit tool calls on their own line. The host executes them and returns\n"
18191886
"TOOL_RESULT: <value> on the next line.\n\n"
1820-
"## Available tools:\n"
1887+
"Available tools:\n"
18211888
" TOOL:LIST [dir] list files (default: sandbox root)\n"
18221889
" TOOL:READ <file> read file contents\n"
18231890
" TOOL:WRITE <file> <text> write text to file\n"
@@ -1828,25 +1895,12 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
18281895
" TOOL:RND random float\n"
18291896
" TOOL:PERMISSION ask user for explicit permission\n"
18301897
" TOOL:CURL <url> HTTP GET; returns response body (max 32 KB)\n\n"
1831-
"## Rules:\n"
1898+
"Rules:\n"
18321899
"- Never access files outside the sandbox.\n"
1833-
"- Use TOOL:PERMISSION when you're about to modify files, delete data, or run external programs.\n"
1900+
"- Use TOOL:PERMISSION before destructive or irreversible operations.\n"
18341901
"- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
18351902
"- Reason step-by-step inside <|think|>…</|think|> (hidden from user).\n"
1836-
"- After each tool call, explain what you did in plain English.\n"
1837-
"Ask the user for explicit permission before proceeding.\n\n"
1838-
"## File Reading\n"
1839-
"When you read a file with TOOL:READ, you MUST:\n"
1840-
"1. Acknowledge what you found\n"
1841-
"2. Use that information in your response\n"
1842-
"3. If the file contains code, explain it or show relevant parts\n"
1843-
"4. If the file contains documentation, summarize key points\n"
1844-
"## Tool Result Integration\n"
1845-
"When you see TOOL_RESULT in the conversation, it contains tool output.\n"
1846-
"Use it to:\n"
1847-
"- Answer questions based on file contents\n"
1848-
"- Explain code or configuration\n"
1849-
"- Provide accurate information from the file\n";
1903+
"- After each tool call, explain what you did in plain English.\n\n";
18501904
for (const auto &kf : knowledge_files) {
18511905
std::ifstream f(kf);
18521906
if (!f) continue;
@@ -1872,21 +1926,7 @@ static void handle_slash(const std::string &input,
18721926
}
18731927

18741928
if (verb == "/help") {
1875-
tui.append_line("[sys] Commands:");
1876-
tui.append_line("[sys] /model [path] load a GGUF model (picker if no path)");
1877-
tui.append_line("[sys] /embed [path] load an embedding model (picker if no path)");
1878-
tui.append_line("[sys] /rag [path] index file or directory (picker if no path)");
1879-
tui.append_line("[sys] /memory KV / VRAM / layer stats");
1880-
tui.append_line("[sys] /clear reset conversation");
1881-
tui.append_line("[sys] /settings show current settings");
1882-
tui.append_line("[sys] /set <key> <value> change a setting live");
1883-
tui.append_line("[sys] /help this message");
1884-
tui.append_line("[sys] exit / quit exit Nitro");
1885-
tui.append_line("[sys] Settable keys (via /set):");
1886-
tui.append_line("[sys] temperature top_p top_k min_p penalty_repeat");
1887-
tui.append_line("[sys] n_max_tokens penalty_last_n rag_top_k n_gpu_layers");
1888-
tui.append_line("[sys] run_allowed (comma-separated list, e.g. python3,make)");
1889-
tui.redraw_all();
1929+
tui.show_help();
18901930
return;
18911931
}
18921932
// ── /model ──────────────────────────────────────────────────────────────
@@ -2111,8 +2151,7 @@ int main(int argc, char **argv) {
21112151
} else if (a == "-g" || a == "--gpu-layers") {
21122152
cfg.n_gpu_layers = std::stoi(take_next(a.c_str()));
21132153
} else if (a == "-h" || a == "--help") {
2114-
std::puts(
2115-
"Usage: nitro [options] [project_dir]\n"
2154+
std::puts("Usage: nitro [options] [project_dir]\n"
21162155
"\n"
21172156
"Options:\n"
21182157
" -m, --model <path> GGUF model to load on startup\n"

0 commit comments

Comments
 (0)