Multi_Agent_System/Self_Corrective_RAG_Agent.py at master · AnsImran/Multi_Agent_System · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
# ## Importing dependencies
import re
from typing import Literal, TypedDict
import openai
from timescale_vector import client  # Client for storing and retrieving vector embeddings from the Timescale/Postgres DB
from pydantic import BaseModel, Field
import os
from dotenv import load_dotenv

from langchain import hub

from langchain_community.tools.tavily_search import TavilySearchResults

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import BaseMessage, AIMessage, convert_to_messages
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.retrievers import BaseRetriever

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from langgraph.graph import END, StateGraph, MessagesState


load_dotenv()  # Load environment variables from a .env file

# Access API keys and credentials
OPENAI_API_KEY    = os.environ["OPENAI_API_KEY"]
TIMESCALE_DB_URI  = os.environ["TIMESCALE_DB_URI"]
TAVILY_API_KEY    = os.environ["TAVILY_API_KEY"]
LANGCHAIN_API_KEY = os.environ["LANGCHAIN_API_KEY"]


# Example usage of OpenAI embedding (commented out for reference):
# query = "Explain The uncertainty principle in Quantum Mechanics"
# query_embedding = openai.embeddings.create(
#     input=[query],  # Input can be a string or list of strings
#     model="text-embedding-3-small"
# )

# Configuration for Timescale vector storage
table_name     = 'embeddings_table'  # Table name for storing vector embeddings
embedding_dims = 1536                # Embedding dimensionality for 'text-embedding-3-small'

# Initialize vector client to manage vector store operations
vec_client = client.Sync(
    TIMESCALE_DB_URI,
    table_name,
    embedding_dims
)


# Regular expression to clean newline characters
NEWLINE_RE = re.compile("\n+")

def get_docs(question: str, documents: list[Document]) -> list[Document]:
    """
    Retrieve top-k similar documents based on vector similarity for a given question.

    Args:
        question (str): The user's question or query.
        documents (list[Document]): Existing list of documents (can be empty).
                                    New retrieved documents will be appended here.

    Returns:
        list[Document]: The updated list with newly retrieved documents appended.
    """
    # Generate embedding for the user's query
    query_embedding = openai.embeddings.create(
        input=[question],  # Accepts a string or list of strings
        model="text-embedding-3-small"
    )

    # Retrieve top 5 most similar embeddings from vector store
    results = vec_client.search(query_embedding.data[0].embedding, limit=5)

    # Convert DB rows into LangChain Document objects and append to list
    for row in results:
        documents.append(Document(page_content=row[2], metadata=row[1]))

    return documents


# Initialize the main language model
llm = ChatOpenAI(model="gpt-4.1-nano-2025-04-14", temperature=0)

# Initialize Tavily web search tool
tavily_search_tool = TavilySearchResults(max_results=3)


# Define the graph state shared across nodes
class GraphState(MessagesState):
    question:         str              # User's question
    documents:        list[Document]   # Retrieved documents relevant to the question
    candidate_answer: str              # Answer generated by the LLM
    retries:          int              # Retry count for hallucination/irrelevant answers
    web_fallback:     bool             # Whether to fall back to web search if retries are exhausted

class GraphConfig(TypedDict):
    max_retries: int  # Maximum number of retries before using web search


MAX_RETRIES = 3
VERBOSE     = True


# ----------------------
# Graph Node: Document Search
# ----------------------
def document_search(state: GraphState, human_message: int = -3):
    """
    Retrieve relevant documents from the vector database based on the user's question.

    Args:
        state (dict): Current graph state.

    Returns:
        dict: Updated state with the retrieved documents and original question.
    """
    if VERBOSE:
        print("---RETRIEVE---")

    # Extract question from the message history
    # if this is a subgraph the human question will be 3rd last in the messages list
    try:
        question = str(convert_to_messages(state["messages"])[human_message].content)
    # if this is the main graph the human question will be the last one in the messages list
    except:
        question = str(convert_to_messages(state["messages"])[-1].content)

    # Retrieve documents using embedding similarity
    documents = get_docs(question, state.get("documents", []))


    return {"documents": documents, "question": question, "web_fallback": True}


# ----------------------
# Graph Node: Answer Generation
# ----------------------
RAG_PROMPT: ChatPromptTemplate = hub.pull("rlm/rag-prompt")
def generate(state: GraphState):
    """
    Generate an answer using the retrieved documents and user's question.

    Args:
        state (dict): Current graph state.

    Returns:
        dict: Updated state with a candidate answer and incremented retry count.
    """
    if VERBOSE:
        print("---GENERATE---")

    question  = state["question"]
    documents = state["documents"]
    retries   = state["retries"] if state.get("retries") is not None else -1

    rag_chain  = RAG_PROMPT | llm | StrOutputParser()
    generation = rag_chain.invoke({"context": documents, "question": question})

    return {"retries": retries + 1, "candidate_answer": generation}


# ----------------------
# Graph Node: Query Rewriting
# ----------------------
QUERY_REWRITER_SYSTEM = (
"""
You are a question rewriter that converts an input question into an optimized version
for vector store retrieval. Understand the semantic intent of the question.
"""
)

QUERY_REWRITER_PROMPT = ChatPromptTemplate.from_messages(
    [
        ("system", QUERY_REWRITER_SYSTEM),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

def transform_query(state: GraphState):
    """
    Rewrite the question to improve retrieval performance.

    Args:
        state (dict): Current graph state.

    Returns:
        dict: Updated state with a reformulated question.
    """
    if VERBOSE:
        print("---TRANSFORM QUERY---")

    question = state["question"]
    query_rewriter  = QUERY_REWRITER_PROMPT | llm | StrOutputParser()
    better_question = query_rewriter.invoke({"question": question})

    return {"question": better_question}


# ----------------------
# Graph Node: Web Search Fallback
# ----------------------
def web_search(state: GraphState):
    """
    Perform a web search as a fallback mechanism when local document retrieval fails.

    Args:
        state (dict): Current graph state.

    Returns:
        dict: Updated state with documents retrieved from web search.
    """
    if VERBOSE:
        print("---RUNNING WEB SEARCH---")

    question  = state["question"]
    documents = state["documents"]

    search_results = tavily_search_tool.invoke(question)
    search_content = "\n".join([d["content"] for d in search_results])

    documents.append(Document(page_content=search_content, metadata={"source": "websearch"}))

    return {"documents": documents, "web_fallback": False}


# ----------------------
# Graph Node: Finalize Answer
# ----------------------
def finalize_response(state: GraphState):
    """
    Finalize and return the generated response to the user.

    Args:
        state (dict): Current graph state.

    Returns:
        dict: Updated state with final AI message.
    """
    if VERBOSE:
        print("---FINALIZING THE RESPONSE---")

    return {"messages": [AIMessage(content=state["candidate_answer"])]}


# ----------------------
# Answer Grading - Hallucination & Relevance
# ----------------------

class GradeHallucinations(BaseModel):
    """Determine whether the generated answer is grounded in provided documents."""
    binary_score: str = Field(description="Answer is grounded in the facts, 'yes' or 'no'")


HALLUCINATION_GRADER_SYSTEM = (
"""
You are a grader assessing whether an LLM's generation is grounded in a set of retrieved facts.
Return 'yes' if fully supported by the facts; otherwise, return 'no'.

If the generation includes code, ensure it exactly matches the retrieved facts.
"""
)

HALLUCINATION_GRADER_PROMPT = ChatPromptTemplate.from_messages([
    ("system", HALLUCINATION_GRADER_SYSTEM),
    ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
])


class GradeAnswer(BaseModel):
    """Determine whether the generated answer addresses the user's question."""
    binary_score: str = Field(description="Answer addresses the question, 'yes' or 'no'")


ANSWER_GRADER_SYSTEM = (
"""
You are a grader determining if an answer resolves a user's question.
Return 'yes' if it does, otherwise return 'no'.
"""
)

ANSWER_GRADER_PROMPT = ChatPromptTemplate.from_messages([
    ("system", ANSWER_GRADER_SYSTEM),
    ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
])


def grade_generation_v_documents_and_question(state: GraphState, config) -> Literal["generate", "transform_query", "web_search", "finalize_response"]:
    """
    Decide the next step based on whether the generated answer is both grounded in documents and relevant to the question.

    Args:
        state (dict): Current graph state.
        config (dict): Graph configuration containing retry logic.

    Returns:
        str: Next node to call in the graph.
    """
    question     = state["question"]
    documents    = state["documents"]
    generation   = state["candidate_answer"]
    web_fallback = state["web_fallback"]
    retries      = state["retries"] if state.get("retries") is not None else -1
    max_retries  = config.get("configurable", {}).get("max_retries", MAX_RETRIES)

    if not web_fallback:
        return "finalize_response"

    if VERBOSE:
        print("---CHECK HALLUCINATIONS---")

    hallucination_grader = HALLUCINATION_GRADER_PROMPT | llm.with_structured_output(GradeHallucinations)
    hallucination_grade: GradeHallucinations = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )

    if hallucination_grade.binary_score == "no":
        if VERBOSE: print("---DECISION: ANSWER NOT GROUNDED, RETRY---")
        return "generate" if retries < max_retries else "web_search"

    if VERBOSE:
        print("---DECISION: ANSWER GROUNDED, NOW CHECK RELEVANCE---")

    answer_grader = ANSWER_GRADER_PROMPT | llm.with_structured_output(GradeAnswer)
    answer_grade: GradeAnswer = answer_grader.invoke({"question": question, "generation": generation})

    if answer_grade.binary_score == "yes":
        if VERBOSE: print("---DECISION: ANSWER RELEVANT---")
        return "finalize_response"
    else:
        if VERBOSE: print("---DECISION: ANSWER IRRELEVANT, RETRY---")
        return "transform_query" if retries < max_retries else "web_search"


# ----------------------
# Graph Construction
# ----------------------
self_corrective_rag_builder = StateGraph(GraphState, config_schema=GraphConfig)

# Add graph nodes
self_corrective_rag_builder.add_node("document_search",   document_search)
self_corrective_rag_builder.add_node("generate",          generate)
self_corrective_rag_builder.add_node("transform_query",   transform_query)
self_corrective_rag_builder.add_node("web_search",        web_search)
self_corrective_rag_builder.add_node("finalize_response", finalize_response)

# Define graph edges
self_corrective_rag_builder.set_entry_point("document_search")
self_corrective_rag_builder.add_edge("document_search",   "generate")
self_corrective_rag_builder.add_edge("transform_query",   "document_search")
self_corrective_rag_builder.add_edge("web_search",        "generate")
self_corrective_rag_builder.add_edge("finalize_response", END)
self_corrective_rag_builder.add_conditional_edges("generate", grade_generation_v_documents_and_question)


self_corrective_rag_agent = self_corrective_rag_builder.compile()

# # Compile the graph
# self_crag = self_corrective_rag_builder.compile()


# # ----------------------
# # Visualize the graph
# # ----------------------
# from IPython.display import Image, display
# display(Image(self_crag.get_graph().draw_mermaid_png()))


# Example usage:
# VERBOSE = True
# inputs = {"messages": [("human", "explain uncertainty principle in quantum mechanics")]}
# for output in self_crag.stream(inputs):
#     print("\n---\n")

# VERBOSE = False
# inputs = {"messages": [("human", "explain diffraction in crystals")]}
# for output in self_crag.stream(inputs):
#     print(output)
#     print("\n---\n")