diff --git a/api/src/org/labkey/api/ApiModule.java b/api/src/org/labkey/api/ApiModule.java index def6dcec40c..5ae6979cf95 100644 --- a/api/src/org/labkey/api/ApiModule.java +++ b/api/src/org/labkey/api/ApiModule.java @@ -189,6 +189,7 @@ import org.labkey.api.view.ViewServlet; import org.labkey.api.view.WebPartFactory; import org.labkey.api.webdav.WebdavResolverImpl; +import org.labkey.api.wiki.WikiRendererType; import org.labkey.api.writer.ContainerUser; import org.labkey.filters.ContentSecurityPolicyFilter; @@ -552,6 +553,7 @@ public void registerServlets(ServletContext servletCtx) UserManager.TestCase.class, ViewCategoryManager.TestCase.class, WebdavResolverImpl.TestCase.class, + WikiRendererType.TestCase.class, WorkbookContainerType.TestCase.class, WriteableLookAndFeelProperties.TestCase.class ); diff --git a/api/src/org/labkey/api/mcp/McpService.java b/api/src/org/labkey/api/mcp/McpService.java index fb3f7c9db76..4aaa47296b4 100644 --- a/api/src/org/labkey/api/mcp/McpService.java +++ b/api/src/org/labkey/api/mcp/McpService.java @@ -36,6 +36,7 @@ import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.function.Supplier; /// @@ -94,6 +95,8 @@ /// public interface McpService extends ToolCallbackProvider { + String VECTOR_SCHEMA = "vector_indexes"; // for pgvector extension, see core-26.005-26.006.sql + Logger LOG = LogHelper.getLogger(McpService.class, "MCP registration exceptions"); String ENABLE_MCP_SERVER_FLAG = "enableMcpServer"; @@ -187,6 +190,8 @@ default ChatClient getChat(HttpSession session, String conversationName, Supplie record MessageResponse(String contentType, String text, HtmlString html) {} + record VectorDocument(String id, String text, Map metadata) {} + /** get a consolidated response (good for many text-oriented agents/use-cases) */ MessageResponse sendMessage(ChatClient chat, String message); @@ -201,4 +206,21 @@ default List sendMessageEx(ChatClient chat, String message) * CONSIDER: Is it possible to implement VectorStoreRetriever wrapper for SearchService??? */ VectorStore getVectorStore(); + + /** Returns true if the vector store exists and contains at least one document. */ + boolean isVectorStorePopulated(@NotNull VectorStore vs); + + /** + * Adds documents to the vector store, automatically splitting any document whose token + * count exceeds the embedding model's input limit. Prefer this over + * {@code getVectorStore().add(...)} for indexing — it prevents the + * {@code IllegalArgumentException} that {@code TokenCountBatchingStrategy} throws on + * oversized inputs. + */ + void addDocuments(List documents); + + void saveVectorStore(); + + /** Drop and recreate the vector store table. Use when the embedding model has changed and dimensions no longer match. */ + void resetVectorStore(); } diff --git a/api/src/org/labkey/api/mcp/NoopMcpService.java b/api/src/org/labkey/api/mcp/NoopMcpService.java index e33eb422a4c..2a0ff1c2f9a 100644 --- a/api/src/org/labkey/api/mcp/NoopMcpService.java +++ b/api/src/org/labkey/api/mcp/NoopMcpService.java @@ -101,4 +101,25 @@ public VectorStore getVectorStore() { return null; } + + @Override + public boolean isVectorStorePopulated(@NotNull VectorStore vs) + { + return false; + } + + @Override + public void addDocuments(List documents) + { + } + + @Override + public void saveVectorStore() + { + } + + @Override + public void resetVectorStore() + { + } } diff --git a/api/src/org/labkey/api/wiki/WikiRendererType.java b/api/src/org/labkey/api/wiki/WikiRendererType.java index b390a9e58cb..c5a98b8b17c 100644 Binary files a/api/src/org/labkey/api/wiki/WikiRendererType.java and b/api/src/org/labkey/api/wiki/WikiRendererType.java differ diff --git a/api/src/org/labkey/api/wiki/WikiService.java b/api/src/org/labkey/api/wiki/WikiService.java index 0fa9e448ae5..9d5da22fb8b 100644 --- a/api/src/org/labkey/api/wiki/WikiService.java +++ b/api/src/org/labkey/api/wiki/WikiService.java @@ -50,6 +50,15 @@ record RenderedWiki (String name, String title, HtmlString html, String entityId RenderedWiki getRenderedWiki(Container c, String name); + record WikiMarkdown(String name, String title, String markdown, String entityId) {} + + /** + * Returns a best-effort Markdown rendering of the wiki's raw source, intended for indexing + * (search, embedding, vector stores) — NOT for user display. Conversion is lossy and may + * drop or mangle markup details that don't have a direct Markdown equivalent. + */ + WikiMarkdown getWikiMarkdown(Container c, String name); + default HtmlString getHtml(Container c, String name) { var wiki = getRenderedWiki(c, name); @@ -105,4 +114,18 @@ default HtmlString getHtml(Container c, String name) String updateAttachments(Container c, User user, String wikiName, @Nullable List attachmentFiles, @Nullable List deleteAttachmentNames); AttachmentParentType getAttachmentType(); + + /** + * Loads all wikis from the given container into the MCP vector store. + * + *

Each {@link org.labkey.api.mcp.McpService.VectorDocument} is assigned an ID of the form + * {@code "/"}, where both components are GUIDs + * (as returned by {@link Container#getId()} and the wiki's own entity ID). + * Tools that consume vector store results (e.g. {@code listDocuments}, + * {@code retrieveDocument}) must use this same format when constructing or + * interpreting document IDs.

+ * + * @return the number of documents added + */ + int populateVectorStore(Container container); } diff --git a/core/module.properties b/core/module.properties index d4c7dec2745..0be23cf2635 100644 --- a/core/module.properties +++ b/core/module.properties @@ -1,13 +1,13 @@ -Name: Core -ModuleClass: org.labkey.core.CoreModule -SchemaVersion: 26.005 -Label: Administration and Essential Services -Description: The Core module provides central services such as login, \ - security, administration, folder management, user management, \ - module upgrade, file attachments, analytics, and portal page management. -Organization: LabKey -OrganizationURL: https://www.labkey.com/ -License: Apache 2.0 -LicenseURL: http://www.apache.org/licenses/LICENSE-2.0 -SupportedDatabases: mssql, pgsql -ManageVersion: true +Name: Core +ModuleClass: org.labkey.core.CoreModule +SchemaVersion: 26.006 +Label: Administration and Essential Services +Description: The Core module provides central services such as login, \ + security, administration, folder management, user management, \ + module upgrade, file attachments, analytics, and portal page management. +Organization: LabKey +OrganizationURL: https://www.labkey.com/ +License: Apache 2.0 +LicenseURL: http://www.apache.org/licenses/LICENSE-2.0 +SupportedDatabases: mssql, pgsql +ManageVersion: true diff --git a/core/resources/schemas/dbscripts/postgresql/core-26.005-26.006.sql b/core/resources/schemas/dbscripts/postgresql/core-26.005-26.006.sql new file mode 100644 index 00000000000..40ae6f63620 --- /dev/null +++ b/core/resources/schemas/dbscripts/postgresql/core-26.005-26.006.sql @@ -0,0 +1 @@ +CREATE SCHEMA IF NOT EXISTS "vector_indexes"; \ No newline at end of file diff --git a/core/src/org/labkey/core/CoreMcp.java b/core/src/org/labkey/core/CoreMcp.java index d85fa45dca9..d8b368253f0 100644 --- a/core/src/org/labkey/core/CoreMcp.java +++ b/core/src/org/labkey/core/CoreMcp.java @@ -19,11 +19,13 @@ import io.modelcontextprotocol.spec.McpSchema.ReadResourceResult; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.json.JSONArray; import org.json.JSONObject; import org.labkey.api.collections.LabKeyCollectors; import org.labkey.api.data.Container; import org.labkey.api.data.ContainerManager; import org.labkey.api.mcp.McpService; +import org.labkey.api.module.ModuleLoader; import org.labkey.api.security.RequiresNoPermission; import org.labkey.api.security.RequiresPermission; import org.labkey.api.security.User; @@ -145,11 +147,32 @@ String setContainer(ToolContext context, @ToolParam(description = "Container pat return message; } + + // TODO replace/augment with available feature list + @Tool(description = "List the modules installed on this server, this may be useful in inferring the available funcitonality. For instance, " + + "the presence of the `premium` module implies the availability of premium featues.") + @RequiresNoPermission + public String listModules(ToolContext context) + { + JSONArray modules = new JSONArray(); + ModuleLoader.getInstance().getModules().stream() + .map(module -> { + JSONObject obj = new JSONObject(); + obj.put("name", module.getName()); + if (StringUtils.isNotEmpty(module.getLabel())) + obj.put("label", module.getLabel()); + return obj; + }) + .forEach(modules::put); + return new JSONObject(Map.of("modules",modules)).toString(); + } + + @McpResource( uri = "resource://org/labkey/core/FileBasedModules.md", mimeType = "application/markdown", name = "File-Based Module Development Guide", - description = "Provide documentation for developing LabKey file-based modules") + description = "Required reading before building file-based modules. Covers module.properties, directory structure, web parts, SQL queries, reports, and deployment.") public ReadResourceResult getFileBasedModuleDevelopmentGuide() throws IOException { incrementResourceRequestCount("File-Based Modules"); @@ -167,7 +190,7 @@ public ReadResourceResult getFileBasedModuleDevelopmentGuide() throws IOExceptio uri = "resource://org/labkey/core/DataAnalysis_Python.md", mimeType = "application/markdown", name = "Python Data Analysis Development Guide", - description = "Provide documentation for developers using Python to analyze LabKey data") + description = "Required reading before writing Python scripts. Covers APIWrapper setup, .netrc auth, select_rows, execute_sql, QueryFilter, and pandas workflows.") public ReadResourceResult getPythonDataAnalysisGuide() throws IOException { incrementResourceRequestCount("Python Data Analysis"); @@ -185,7 +208,7 @@ public ReadResourceResult getPythonDataAnalysisGuide() throws IOException uri = "resource://org/labkey/core/DataAnalysis_R.md", mimeType = "application/markdown", name = "R Data Analysis Development Guide", - description = "Provide documentation for developers using R to analyze LabKey data") + description = "Required reading before writing R scripts. Covers Rlabkey setup, .netrc auth, labkey.selectRows, labkey.executeSql, makeFilter, and data frame workflows.") public ReadResourceResult getRDataAnalysisGuide() throws IOException { incrementResourceRequestCount("R Data Analysis"); diff --git a/core/src/org/labkey/core/CoreModule.java b/core/src/org/labkey/core/CoreModule.java index cc41079ae40..f30e1746ebf 100644 --- a/core/src/org/labkey/core/CoreModule.java +++ b/core/src/org/labkey/core/CoreModule.java @@ -368,6 +368,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.labkey.api.mcp.McpService.VECTOR_SCHEMA; import static org.labkey.api.settings.StashedStartupProperties.homeProjectFolderType; import static org.labkey.api.settings.StashedStartupProperties.homeProjectResetPermissions; import static org.labkey.api.settings.StashedStartupProperties.homeProjectWebparts; @@ -1552,7 +1553,8 @@ public Collection getSchemaNames() CoreSchema.getInstance().getSchemaName(), // core PropertySchema.getInstance().getSchemaName(), // prop TestSchema.getInstance().getSchemaName(), // test - DbSchema.TEMP_SCHEMA_NAME // temp + DbSchema.TEMP_SCHEMA_NAME, // temp + VECTOR_SCHEMA // used by pgvector - see core-26.005-26.006.sql ); } diff --git a/devtools/src/org/labkey/devtools/TestController.java b/devtools/src/org/labkey/devtools/TestController.java index 1240c01fb0e..ac5baa6e324 100644 --- a/devtools/src/org/labkey/devtools/TestController.java +++ b/devtools/src/org/labkey/devtools/TestController.java @@ -18,12 +18,10 @@ import jakarta.servlet.http.HttpServletResponse; import org.apache.logging.log4j.Logger; -import org.jetbrains.annotations.NotNull; import org.junit.Assert; import org.junit.Test; import org.labkey.api.action.ApiResponse; import org.labkey.api.action.ApiSimpleResponse; -import org.labkey.api.action.ConfirmAction; import org.labkey.api.action.FormArrayList; import org.labkey.api.action.FormViewAction; import org.labkey.api.action.JsonInputLimit; @@ -35,7 +33,6 @@ import org.labkey.api.action.SimpleResponse; import org.labkey.api.action.SimpleViewAction; import org.labkey.api.action.SpringActionController; -import org.labkey.api.data.Container; import org.labkey.api.data.ContainerManager; import org.labkey.api.mcp.AbstractAgentAction; import org.labkey.api.mcp.McpService; @@ -57,9 +54,7 @@ import org.labkey.api.util.ConfigurationException; import org.labkey.api.util.DOM; import org.labkey.api.util.ExceptionUtil; -import org.labkey.api.util.FileUtil; import org.labkey.api.util.HtmlString; -import org.labkey.api.util.HtmlStringBuilder; import org.labkey.api.util.PageFlowUtil; import org.labkey.api.util.TestContext; import org.labkey.api.util.URLHelper; @@ -74,15 +69,10 @@ import org.labkey.api.view.ViewServlet; import org.labkey.api.view.template.ClientDependency; import org.labkey.api.view.template.PageConfig; -import org.labkey.api.wiki.WikiService; -import org.springframework.ai.document.Document; -import org.springframework.ai.vectorstore.SimpleVectorStore; -import org.springframework.ai.vectorstore.VectorStore; import org.springframework.dao.PessimisticLockingFailureException; import org.springframework.mock.web.MockHttpServletResponse; import org.springframework.validation.BindException; import org.springframework.validation.Errors; -import org.springframework.validation.ObjectError; import org.springframework.web.servlet.ModelAndView; import org.springframework.web.servlet.mvc.Controller; @@ -92,9 +82,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Objects; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Gatherers; import static org.labkey.api.util.DOM.Attribute.name; import static org.labkey.api.util.DOM.Attribute.src; @@ -1322,88 +1309,6 @@ protected String getServicePrompt() } } - - @RequiresLogin - public static class PopulateVectorStoreAction extends ConfirmAction - { - AtomicInteger count = new AtomicInteger(); - - @Override - public ModelAndView getConfirmView(Object o, BindException errors) - { - var db = FileUtil.getTempDirectoryFileLike().resolveChild("VectorStore.database"); - HtmlStringBuilder message = HtmlStringBuilder.of(); - message.append("This will add the contents of /Documention wikis to the vector store.").append(HtmlString.BR); - message.append("This may take a few minutes."); - if (db.exists()) - message.unsafeAppend("

").append("I see a vector store file already exists. Just FYI."); - return new HtmlView(message); - } - - @Override - public void validateCommand(Object o, Errors errors) - { - } - - @Override - public @NotNull URLHelper getSuccessURL(Object o) - { - return null; - } - - - // not usually used but some actions return views that close the current window etc... - @Override - public ModelAndView getSuccessView(Object form) - { - return HtmlView.of(count.get() + " documents added to vector store"); - } - - @Override - public boolean handlePost(Object o, BindException errors) - { - Container documentsContainer = ContainerManager.getForPath("/Documentation"); - if (null == documentsContainer) - throw new NotFoundException(); - VectorStore vs = McpService.get().getVectorStore(); - if (null == vs) - throw new NotFoundException("/Documentation project was not found"); - - ActionURL wikiBase = new ActionURL("wiki","page",documentsContainer); - - WikiService service = Objects.requireNonNull(WikiService.get()); - List all = service.getNames(documentsContainer); - all.stream() - .map(name -> service.getRenderedWiki(documentsContainer, name)) - .filter(Objects::nonNull) - .map(wiki -> - { - count.incrementAndGet(); - var metadata = Map.of( - "Content-Type", "text/html", - "filename", wiki.name() + ".html", - "title", (Object)wiki.title(), - "source", wikiBase.clone().addParameter("name",wiki.name()).getURIString() - ); - return new Document(wiki.entityId(), wiki.html().toString(), metadata); - }) - .gather(Gatherers.windowFixed(50)) - .forEach(vs); - - var db = FileUtil.getTempDirectoryFileLike().resolveChild("VectorStore.database"); - try - { - ((SimpleVectorStore)vs).save(db.toNioPathForRead().toFile()); - return true; - } - catch (Exception x) - { - errors.addError(new ObjectError("form", "error saving vectordb: " + x.getMessage())); - return false; - } - } - } - @AdminConsoleAction(AdminOperationsPermission.class) @JsonInputLimit(50) public static class TestJsonObjectInputLimitAction extends MutatingApiAction diff --git a/query/src/org/labkey/query/controllers/QueryMcp.java b/query/src/org/labkey/query/controllers/QueryMcp.java index 07e28cee290..8cc5cd53603 100644 --- a/query/src/org/labkey/query/controllers/QueryMcp.java +++ b/query/src/org/labkey/query/controllers/QueryMcp.java @@ -67,7 +67,7 @@ public class QueryMcp implements McpService.McpImpl uri = "resource://org/labkey/query/controllers/prompts/LabKeySql.md", mimeType = "application/markdown", name = "LabKey SQL", - description = "Provide documentation for LabKey SQL specific syntax") + description = "Required reading before writing or debugging LabKey SQL. Covers lookup traversal, PIVOT, parameterized queries, cross-folder queries, and all available functions.") public ReadResourceResult getLabKeySQLDocumentation() throws IOException { incrementResourceRequestCount("LabKey SQL"); diff --git a/wiki/src/org/labkey/wiki/WikiManager.java b/wiki/src/org/labkey/wiki/WikiManager.java index ef2695d8645..3e1527e6ce5 100644 --- a/wiki/src/org/labkey/wiki/WikiManager.java +++ b/wiki/src/org/labkey/wiki/WikiManager.java @@ -42,6 +42,8 @@ import org.labkey.api.data.Table; import org.labkey.api.data.TableInfo; import org.labkey.api.data.TableSelector; +import org.labkey.api.mcp.McpService; +import org.labkey.api.mcp.McpService.VectorDocument; import org.labkey.api.query.FieldKey; import org.labkey.api.query.QueryService; import org.labkey.api.search.SearchService; @@ -56,6 +58,7 @@ import org.labkey.api.util.TestContext; import org.labkey.api.view.ActionURL; import org.labkey.api.view.HtmlView; +import org.labkey.api.view.NotFoundException; import org.labkey.api.view.NavTree; import org.labkey.api.view.Portal; import org.labkey.api.view.ViewContext; @@ -92,6 +95,7 @@ import java.util.Map; import java.util.Objects; import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicInteger; import static org.labkey.api.action.SpringActionController.ERROR_MSG; @@ -869,6 +873,28 @@ public RenderedWiki getRenderedWiki(Container c, String name) } } + @Override + public WikiMarkdown getWikiMarkdown(Container c, String name) + { + if (null == c || null == name) + return null; + + try + { + Wiki wiki = WikiSelectManager.getWiki(c, name); + if (null == wiki) + return null; + WikiVersion version = wiki.getLatestVersion(); + String body = version.getBody(); + String markdown = version.getRendererTypeEnum().bestAttemptConvertToMarkdown(null == body ? "" : body); + return new WikiMarkdown(name, version.getTitle(), markdown, wiki.getEntityId()); + } + catch (Exception x) + { + throw new RuntimeException(x); + } + } + @Override public void insertWiki(User user, Container c, String name, String body, WikiRendererType renderType, String title) { @@ -932,6 +958,66 @@ public List getNames(Container c) return new ArrayList<>(l); } + @Override + public int populateVectorStore(Container container) + { + McpService mcp = McpService.get(); + if (null == mcp.getVectorStore()) + throw new NotFoundException("VectorStore not enabled."); + + ActionURL wikiBase = new ActionURL("wiki", "page", container); + AtomicInteger count = new AtomicInteger(); + + for (String name : getNames(container)) + { + Wiki wiki = WikiSelectManager.getWiki(container, name); + if (null == wiki) + continue; + WikiVersion version = wiki.getLatestVersion(); + if (null == version) + continue; + + String body = version.getBody(); + String markdown = version.getRendererTypeEnum().bestAttemptConvertToMarkdown(null == body ? "" : body); + List path = getAncestorTitles(wiki); + + Map metadata = new HashMap<>(); + metadata.put("Content-Type", "text/markdown"); + metadata.put("filename", name + ".md"); + metadata.put("title", version.getTitle()); + metadata.put("source", wikiBase.clone().addParameter("name", name).getURIString()); + if (!path.isEmpty()) + metadata.put("path", path); + + VectorDocument doc = new VectorDocument(container.getId() + "/" + wiki.getEntityId(), markdown, metadata); + try + { + mcp.addDocuments(List.of(doc)); + count.incrementAndGet(); + } + catch (IllegalArgumentException x) + { + LogManager.getLogger(WikiManager.class).info(name, x); + } + } + + mcp.saveVectorStore(); + return count.get(); + } + + private List getAncestorTitles(Wiki wiki) + { + List titles = new ArrayList<>(); + Wiki current = wiki.getParentWiki(); + while (current != null) + { + WikiVersion version = current.getLatestVersion(); + titles.add(0, version != null ? version.getTitle() : current.getName()); + current = current.getParentWiki(); + } + return titles; + } + @Override public void addWikiListener(WikiChangeListener listener) {