diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py
index 1cc608939d7..aa0628de9ff 100644
--- a/backend/danswer/configs/model_configs.py
+++ b/backend/danswer/configs/model_configs.py
@@ -86,6 +86,8 @@
GEN_AI_CLIENT_SECRET = os.environ.get("GEN_AI_CLIENT_SECRET") or None
GEN_AI_ACCOUNT_ID = os.environ.get("GEN_AI_ACCOUNT_ID") or None
GEN_AI_TENANT_ID = os.environ.get("GEN_AI_TENANT_ID") or None
+GEN_AI_VENDOR = os.environ.get("GEN_AI_VENDOR") or "openai"
+GEN_AI_MODEL_NAME = os.environ.get("GEN_AI_MODEL_NAME") or "gpt-4o-2024-11-20"
# Number of tokens from chat history to include at maximum
# 3000 should be enough context regardless of use, no need to include as much as possible
# as this drives up the cost unnecessarily
diff --git a/backend/danswer/danswerbot/slack/handlers/handle_message.py b/backend/danswer/danswerbot/slack/handlers/handle_message.py
index 088491adb72..d4b79ef0d2c 100644
--- a/backend/danswer/danswerbot/slack/handlers/handle_message.py
+++ b/backend/danswer/danswerbot/slack/handlers/handle_message.py
@@ -59,6 +59,7 @@
compute_max_document_tokens_for_persona,
)
from danswer.llm.factory import get_llms_for_persona
+from danswer.llm.override_models import LLMOverride
from danswer.llm.utils import check_number_of_tokens
from danswer.llm.utils import get_max_input_tokens
from danswer.one_shot_answer.answer_question import get_search_answer
@@ -572,7 +573,15 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non
Persona,
fetch_persona_by_id(db_session, new_message_request.persona_id),
)
- llm, _ = get_llms_for_persona(persona)
+ llm_override = None
+ if channel_config and channel_config.channel_config:
+ ch_vendor = channel_config.channel_config.get("llm_vendor")
+ ch_model = channel_config.channel_config.get("llm_model_name")
+ if ch_vendor or ch_model:
+ llm_override = LLMOverride(
+ model_provider=ch_vendor, model_version=ch_model
+ )
+ llm, _ = get_llms_for_persona(persona, llm_override=llm_override)
# In cases of threads, split the available tokens between docs and thread context
input_tokens = get_max_input_tokens(
diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py
index 58a8f32a8e9..4a07d4c2887 100644
--- a/backend/danswer/db/models.py
+++ b/backend/danswer/db/models.py
@@ -1104,6 +1104,9 @@ class ChannelConfig(TypedDict):
jira_config: NotRequired[dict[str, Any]] # Contains all JIRA related settings
# Curated response config if user asks for more help
curated_response_config: NotRequired[dict[str, Any]]
+ # LLM configuration for this channel
+ llm_vendor: NotRequired[str]
+ llm_model_name: NotRequired[str]
class SlackBotResponseType(str, PyEnum):
diff --git a/backend/danswer/llm/custom_llm.py b/backend/danswer/llm/custom_llm.py
index 18c8a20b569..6bf9cc02917 100644
--- a/backend/danswer/llm/custom_llm.py
+++ b/backend/danswer/llm/custom_llm.py
@@ -14,7 +14,9 @@
from danswer.configs.model_configs import GEN_AI_CLIENT_SECRET
from danswer.configs.model_configs import GEN_AI_IDENTITY_ENDPOINT
from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS
+from danswer.configs.model_configs import GEN_AI_MODEL_NAME
from danswer.configs.model_configs import GEN_AI_TENANT_ID
+from danswer.configs.model_configs import GEN_AI_VENDOR
from danswer.llm.interfaces import LLM
from danswer.llm.interfaces import LLMConfig
from danswer.llm.interfaces import ToolChoiceOptions
@@ -67,8 +69,7 @@ def __init__(
# Not used here but you probably want a model server that isn't completely open
api_key: str | None,
timeout: int,
- endpoint: str
- | None = "https://alpha.uipath.com/{account_id}/{tenant_id}/llmgateway_/api/raw/vendor/openai/model/gpt-4o-2024-11-20/completions",
+ endpoint: str | None = None,
identity_url: str | None = GEN_AI_IDENTITY_ENDPOINT,
client_id: str | None = GEN_AI_CLIENT_ID,
client_secret: str | None = GEN_AI_CLIENT_SECRET,
@@ -76,11 +77,15 @@ def __init__(
tenant_id: str | None = GEN_AI_TENANT_ID,
max_output_tokens: int = int(GEN_AI_MAX_OUTPUT_TOKENS),
api_version: str | None = GEN_AI_API_VERSION,
+ llm_vendor: str | None = None,
+ llm_model_name: str | None = None,
):
+ vendor = llm_vendor or GEN_AI_VENDOR
+ model = llm_model_name or GEN_AI_MODEL_NAME
if not endpoint:
- raise ValueError(
- "Cannot point Danswer to a custom LLM server without providing the "
- "endpoint for the model server."
+ endpoint = (
+ "https://alpha.uipath.com/{account_id}/{tenant_id}"
+ f"/llmgateway_/api/raw/vendor/{vendor}/model/{model}/completions"
)
if not identity_url:
@@ -129,9 +134,8 @@ def __init__(
self._max_output_tokens = max_output_tokens
self._timeout = timeout
self.token = self._get_token()
- # TODO: Remove hard-coding
- self._model_provider = "custom"
- self._model_version = "gpt-4"
+ self._model_provider = vendor
+ self._model_version = model
self._temperature = 0.0
self._api_key = api_key
@@ -139,42 +143,62 @@ def __init__(
self._max_output_tokens = 7000
def _execute(self, input: LanguageModelInput) -> AIMessage:
+ is_bedrock = self._model_provider == "awsbedrock"
+ api_flavor = "converse" if is_bedrock else "chat-completions"
+
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer " + self.token,
"X-UiPath-LlmGateway-RequestingProduct": "darwin",
"X-UiPath-LlmGateway-RequestingFeature": "ChatWithAssistant",
- "X-UiPath-LlmGateway-ApiFlavor": "chat-completions",
+ "X-UiPath-LlmGateway-ApiFlavor": api_flavor,
"X-UiPath-LlmGateway-ApiVersion": "2024-10-21",
"X-UiPath-LlmGateway-TimeoutSeconds": "60",
"X-UIPATH-STREAMING-ENABLED": "false",
}
- # print(f"Input: {input}")
chatPrompt = convert_lm_input_to_prompt(input)
-
- json_array = []
messages = chatPrompt.to_messages()
- for msg in messages:
- mapped_type = self._map_type(msg.type)
- json_obj = {
- "role": mapped_type,
- "content": self._clean_json_string(msg.content),
- }
- json_array.append(json_obj)
- data = {"max_tokens": self._max_output_tokens, "messages": json_array}
+ if is_bedrock:
+ # AWS Bedrock Converse API format
+ bedrock_messages = []
+ for msg in messages:
+ mapped_type = self._map_type(msg.type)
+ if mapped_type == "system":
+ continue # system handled separately below
+ bedrock_messages.append(
+ {
+ "role": mapped_type,
+ "content": [{"text": self._clean_json_string(msg.content)}],
+ }
+ )
+ data: dict = {
+ "messages": bedrock_messages,
+ "inferenceConfig": {"maxTokens": self._max_output_tokens},
+ }
+ system_msgs = [
+ {"text": self._clean_json_string(msg.content)}
+ for msg in messages
+ if self._map_type(msg.type) == "system"
+ ]
+ if system_msgs:
+ data["system"] = system_msgs
+ else:
+ # OpenAI chat-completions format
+ json_array = []
+ for msg in messages:
+ mapped_type = self._map_type(msg.type)
+ json_array.append(
+ {
+ "role": mapped_type,
+ "content": self._clean_json_string(msg.content),
+ }
+ )
+ data = {"max_tokens": self._max_output_tokens, "messages": json_array}
try:
- print(data)
- with open("requestdata.json", "w") as fp:
- json.dump(data, fp)
-
- # json_str = json.dumps(data, ensure_ascii=False, indent=4)
- # print(f"Request Data: {json_str}")
- # json_data = json.loads(json_str)
response = requests.post(
- # self._endpoint, headers=headers, data=json_str, timeout=self._timeout
self._endpoint,
headers=headers,
json=data,
@@ -185,16 +209,21 @@ def _execute(self, input: LanguageModelInput) -> AIMessage:
response.raise_for_status()
try:
- data = json.loads(response.content)
- print(data)
+ response_data = json.loads(response.content)
except json.decoder.JSONDecodeError as e:
print("Failed to parse JSON:", response.content)
raise e
message_content = "No response from LLM server"
- if data["choices"]:
- message_content = data["choices"][0]["message"]["content"]
- # print(message_content)
+ if is_bedrock:
+ output = (
+ response_data.get("output", {}).get("message", {}).get("content", [])
+ )
+ if output:
+ message_content = output[0].get("text", message_content)
+ else:
+ if response_data.get("choices"):
+ message_content = response_data["choices"][0]["message"]["content"]
return AIMessage(content=message_content)
def _clean_json_string(self, input_string):
diff --git a/backend/danswer/llm/factory.py b/backend/danswer/llm/factory.py
index 7e8a59c1d94..ab67e43939c 100644
--- a/backend/danswer/llm/factory.py
+++ b/backend/danswer/llm/factory.py
@@ -1,16 +1,13 @@
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
from danswer.configs.chat_configs import QA_TIMEOUT
+from danswer.configs.model_configs import GEN_AI_MODEL_NAME
from danswer.configs.model_configs import GEN_AI_TEMPERATURE
-from danswer.db.engine import get_session_context_manager
-from danswer.db.llm import fetch_default_provider
-from danswer.db.llm import fetch_provider
+from danswer.configs.model_configs import GEN_AI_VENDOR
from danswer.db.models import Persona
-from danswer.llm.chat_llm import DefaultMultiLLM
+from danswer.llm.custom_llm import CustomModelServer
from danswer.llm.exceptions import GenAIDisabledException
-from danswer.llm.headers import build_llm_extra_headers
from danswer.llm.interfaces import LLM
from danswer.llm.override_models import LLMOverride
-from danswer.llm.custom_llm import CustomModelServer
def get_main_llm_from_tuple(
@@ -26,40 +23,13 @@ def get_llms_for_persona(
) -> tuple[LLM, LLM]:
model_provider_override = llm_override.model_provider if llm_override else None
model_version_override = llm_override.model_version if llm_override else None
- temperature_override = llm_override.temperature if llm_override else None
-
- provider_name = model_provider_override or persona.llm_model_provider_override
- if not provider_name:
- return get_default_llms(
- temperature=temperature_override or GEN_AI_TEMPERATURE,
- additional_headers=additional_headers,
- )
-
- with get_session_context_manager() as db_session:
- llm_provider = fetch_provider(db_session, provider_name)
-
- if not llm_provider:
- raise ValueError("No LLM provider found")
-
- model = model_version_override or persona.llm_model_version_override
- fast_model = llm_provider.fast_default_model_name or llm_provider.default_model_name
- if not model:
- raise ValueError("No model name found")
- if not fast_model:
- raise ValueError("No fast model name found")
+ llm_override.temperature if llm_override else None
- def _create_llm(model: str) -> LLM:
- return get_llm(
- provider=llm_provider.provider,
- model=model,
- api_key=llm_provider.api_key,
- api_base=llm_provider.api_base,
- api_version=llm_provider.api_version,
- custom_config=llm_provider.custom_config,
- additional_headers=additional_headers,
- )
+ vendor = model_provider_override or GEN_AI_VENDOR
+ model = model_version_override or GEN_AI_MODEL_NAME
- return _create_llm(model), _create_llm(fast_model)
+ llm = get_llm(provider=vendor, model=model)
+ return llm, llm
def get_default_llms(
@@ -70,35 +40,8 @@ def get_default_llms(
if DISABLE_GENERATIVE_AI:
raise GenAIDisabledException()
- with get_session_context_manager() as db_session:
- llm_provider = fetch_default_provider(db_session)
-
- if not llm_provider:
- raise ValueError("No default LLM provider found")
-
- model_name = llm_provider.default_model_name
- fast_model_name = (
- llm_provider.fast_default_model_name or llm_provider.default_model_name
- )
- if not model_name:
- raise ValueError("No default model name found")
- if not fast_model_name:
- raise ValueError("No fast default model name found")
-
- def _create_llm(model: str) -> LLM:
- return get_llm(
- provider=llm_provider.provider,
- model=model,
- api_key=llm_provider.api_key,
- api_base=llm_provider.api_base,
- api_version=llm_provider.api_version,
- custom_config=llm_provider.custom_config,
- timeout=timeout,
- temperature=temperature,
- additional_headers=additional_headers,
- )
-
- return _create_llm(model_name), _create_llm(fast_model_name)
+ llm = get_llm(provider=GEN_AI_VENDOR, model=GEN_AI_MODEL_NAME, timeout=timeout)
+ return llm, llm
def get_llm(
@@ -113,6 +56,8 @@ def get_llm(
additional_headers: dict[str, str] | None = None,
) -> LLM:
return CustomModelServer(
- timeout=timeout,
- api_key=api_key,
- )
\ No newline at end of file
+ timeout=timeout,
+ api_key=api_key,
+ llm_vendor=provider,
+ llm_model_name=model,
+ )
diff --git a/backend/danswer/server/manage/models.py b/backend/danswer/server/manage/models.py
index da7ca7bc4e1..a5a1268eaa4 100644
--- a/backend/danswer/server/manage/models.py
+++ b/backend/danswer/server/manage/models.py
@@ -116,6 +116,8 @@ class SlackBotConfigCreationRequest(BaseModel):
jira_title_filter: list[str] | None = None
curated_response_user_title_filter: list[str] | None = None
response_type: SlackBotResponseType
+ llm_vendor: str | None = None
+ llm_model_name: str | None = None
@validator("answer_filters", pre=True)
def validate_filters(cls, value: list[str]) -> list[str]:
diff --git a/backend/danswer/server/manage/slack_bot.py b/backend/danswer/server/manage/slack_bot.py
index 718fb3f86a5..6bb6709be57 100644
--- a/backend/danswer/server/manage/slack_bot.py
+++ b/backend/danswer/server/manage/slack_bot.py
@@ -105,6 +105,12 @@ def _form_channel_config(
] = curated_response_user_title_filter
if curated_response_config:
channel_config["curated_response_config"] = curated_response_config
+ if slack_bot_config_creation_request.llm_vendor:
+ channel_config["llm_vendor"] = slack_bot_config_creation_request.llm_vendor
+ if slack_bot_config_creation_request.llm_model_name:
+ channel_config[
+ "llm_model_name"
+ ] = slack_bot_config_creation_request.llm_model_name
channel_config[
"respond_to_bots"
diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml
index 3272e716843..294b22deff1 100644
--- a/deployment/docker_compose/docker-compose.dev.yml
+++ b/deployment/docker_compose/docker-compose.dev.yml
@@ -44,6 +44,8 @@ services:
- GEN_AI_CLIENT_SECRET=${GEN_AI_CLIENT_SECRET:-}
- GEN_AI_ACCOUNT_ID=${GEN_AI_ACCOUNT_ID:-}
- GEN_AI_TENANT_ID=${GEN_AI_TENANT_ID:-}
+ - GEN_AI_VENDOR=${GEN_AI_VENDOR:-openai}
+ - GEN_AI_MODEL_NAME=${GEN_AI_MODEL_NAME:-gpt-4o-2024-11-20}
- GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-}
- GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-}
- GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
@@ -133,6 +135,8 @@ services:
- GEN_AI_CLIENT_SECRET=${GEN_AI_CLIENT_SECRET:-}
- GEN_AI_ACCOUNT_ID=${GEN_AI_ACCOUNT_ID:-}
- GEN_AI_TENANT_ID=${GEN_AI_TENANT_ID:-}
+ - GEN_AI_VENDOR=${GEN_AI_VENDOR:-openai}
+ - GEN_AI_MODEL_NAME=${GEN_AI_MODEL_NAME:-gpt-4o-2024-11-20}
- GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-}
- GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-}
- GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
diff --git a/deployment/docker_compose/docker-compose.local.yml b/deployment/docker_compose/docker-compose.local.yml
index 85c206622e7..408af43fb29 100644
--- a/deployment/docker_compose/docker-compose.local.yml
+++ b/deployment/docker_compose/docker-compose.local.yml
@@ -45,6 +45,8 @@ services:
- GEN_AI_CLIENT_SECRET=${GEN_AI_CLIENT_SECRET:-}
- GEN_AI_ACCOUNT_ID=${GEN_AI_ACCOUNT_ID:-}
- GEN_AI_TENANT_ID=${GEN_AI_TENANT_ID:-}
+ - GEN_AI_VENDOR=${GEN_AI_VENDOR:-openai}
+ - GEN_AI_MODEL_NAME=${GEN_AI_MODEL_NAME:-gpt-4o-2024-11-20}
- GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-}
- GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-}
- GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
@@ -136,6 +138,8 @@ services:
- GEN_AI_CLIENT_SECRET=${GEN_AI_CLIENT_SECRET:-}
- GEN_AI_ACCOUNT_ID=${GEN_AI_ACCOUNT_ID:-}
- GEN_AI_TENANT_ID=${GEN_AI_TENANT_ID:-}
+ - GEN_AI_VENDOR=${GEN_AI_VENDOR:-openai}
+ - GEN_AI_MODEL_NAME=${GEN_AI_MODEL_NAME:-gpt-4o-2024-11-20}
- GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-}
- GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-}
- GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
diff --git a/web/Dockerfile.prebuilt b/web/Dockerfile.prebuilt
new file mode 100644
index 00000000000..0ebed303b7a
--- /dev/null
+++ b/web/Dockerfile.prebuilt
@@ -0,0 +1,37 @@
+FROM node:20-alpine AS runner
+WORKDIR /app
+
+RUN rm -rf /usr/local/lib/node_modules
+
+ENV NEXT_TELEMETRY_DISABLED 1
+
+RUN addgroup --system --gid 1001 nodejs
+RUN adduser --system --uid 1001 nextjs
+USER nextjs
+
+COPY --chown=nextjs:nodejs public ./public
+COPY --chown=nextjs:nodejs .next/standalone ./
+COPY --chown=nextjs:nodejs .next/static ./.next/static
+
+ARG NEXT_PUBLIC_DISABLE_STREAMING
+ENV NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING}
+
+ARG NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA
+ENV NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA}
+
+ARG NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS
+ENV NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS}
+
+ARG NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS
+ENV NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS}
+
+ARG NEXT_PUBLIC_THEME
+ENV NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME}
+
+ARG NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED
+ENV NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED}
+
+ARG NEXT_PUBLIC_DISABLE_LOGOUT
+ENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT}
+
+CMD ["node", "server.js"]
diff --git a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
index 032e47aad00..4bd54c469d3 100644
--- a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
+++ b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
@@ -109,6 +109,10 @@ export const SlackBotCreationForm = ({
? existingSlackBotConfig.persona.id
: null,
response_type: existingSlackBotConfig?.response_type || "citations",
+ llm_vendor:
+ existingSlackBotConfig?.channel_config?.llm_vendor || "",
+ llm_model_name:
+ existingSlackBotConfig?.channel_config?.llm_model_name || "",
prioritized_sources:
existingSlackBotConfig?.channel_config?.prioritized_sources || [],
jira_config: existingSlackBotConfig?.channel_config
@@ -144,6 +148,8 @@ export const SlackBotCreationForm = ({
opsgenie_schedule: Yup.string(),
document_sets: Yup.array().of(Yup.number()),
persona_id: Yup.number().nullable(),
+ llm_vendor: Yup.string(),
+ llm_model_name: Yup.string(),
prioritized_sources: Yup.array().of(Yup.string()),
jira_config: Yup.object().shape({
enable_jira_integration: Yup.boolean().required(),
@@ -361,6 +367,57 @@ export const SlackBotCreationForm = ({
]}
/>
+
+
+ {values.llm_vendor === "awsbedrock" && (
+
+ )}
+
+ {values.llm_vendor === "openai" && (
+
+ )}
+
When should Darwin respond?
diff --git a/web/src/app/admin/bot/lib.ts b/web/src/app/admin/bot/lib.ts
index 35900b46cf3..a6bee4d815b 100644
--- a/web/src/app/admin/bot/lib.ts
+++ b/web/src/app/admin/bot/lib.ts
@@ -32,6 +32,8 @@ interface SlackBotConfigCreationRequest {
curated_response_user_title_filter?: string[];
usePersona: boolean;
response_type: SlackBotResponseType;
+ llm_vendor?: string;
+ llm_model_name?: string;
}
const buildFiltersFromCreationRequest = (
@@ -69,6 +71,8 @@ const buildRequestBodyFromCreationRequest = (
? { persona_id: creationRequest.persona_id }
: { document_sets: creationRequest.document_sets }),
response_type: creationRequest.response_type,
+ llm_vendor: creationRequest.llm_vendor,
+ llm_model_name: creationRequest.llm_model_name,
});
};
diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts
index dd5adb9685c..eff795fac25 100644
--- a/web/src/lib/types.ts
+++ b/web/src/lib/types.ts
@@ -590,6 +590,8 @@ export interface ChannelConfig {
enable_curated_response_integration?: boolean;
response_message?: string;
};
+ llm_vendor?: string;
+ llm_model_name?: string;
}
export type SlackBotResponseType = "quotes" | "citations";