Skip to content
Snippets Groups Projects
Commit f954a142 authored by Sangamithra Panneer Selvam's avatar Sangamithra Panneer Selvam
Browse files

Metrics calculation script

parent 11d07d40
No related branches found
No related tags found
No related merge requests found
......@@ -8,11 +8,20 @@ import matplotlib.pyplot as plt
from io import BytesIO
from visualization import plot_embeddings
from langchain.embeddings import HuggingFaceEmbeddings
import json
from datetime import datetime
import logging
app = Flask(__name__)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
user_question = []
user_answer = []
readme_ratings = {}
ratings_list = []
faithfulness = None
relevancy= None
local_directory = os.getenv("SHARED_FOLDER_PATH")
......@@ -25,7 +34,27 @@ def save_uploaded_file(file):
@app.route("/")
def index():
return render_template("index.html")
global faithfulness
global relevancy
with open("metrics.txt", mode="r") as f:
content = ""
for line in f.readlines():
content += line.strip('') # Concatenate lines with hyphen
# Split the content into two items based on '|'
metrics_list = content.split('|')
metrics_list = [item.strip('') for item in metrics_list] # Remove trailing hyphen
try:
faithfulness=metrics_list[-2]
relevancy=metrics_list[-1].rstrip('\n')
except:
faithfulness=metrics_list[:-1]
relevancy=metrics_list[-1].rstrip('\n')
return render_template("index.html", faithfulness= faithfulness , relevancy = relevancy)
@app.route("/handle_request", methods=["POST"])
......@@ -191,6 +220,35 @@ def send_plot_as_image(fig):
img_bytes = fig.to_image(format="png")
return send_file(BytesIO(img_bytes), mimetype="image/png")
@app.route('/rate_readme', methods=['POST'])
def rate_readme():
global faithfulness
global relevancy
faithfulness_score = float(faithfulness)
relevancy_score = float(relevancy)
try:
data = request.json
rating = data['rating']
feedback = data.get('feedback', '') # Get the feedback string, default to empty string if not provided
readme_ratings.setdefault(local_directory, []).append({'rating': rating, 'feedback': feedback, 'faithfulness' : faithfulness_score, 'answer_Relevancy' : relevancy_score})
logger.info(readme_ratings)
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
try:
ratings_filename = os.path.join(local_directory, "ratings.json")
rating_dict={"Rating": rating, "Feedback": feedback, "Faithfulness": faithfulness_score, "Answer_Relevancy": relevancy_score, "Timestamp": timestamp}
ratings_list.append(rating_dict)
with open(ratings_filename, 'w') as file:
json.dump(ratings_list, file, indent=4)
logger.info(f"Rating: {rating}, Feedback: {feedback}, Faithfulness: {faithfulness_score}, Answer_Relevancy: {relevancy_score}, Timestamp: {timestamp}\n")
except Exception as e:
logger.exception("An error occurred")
return jsonify({'success': True})
except Exception as e:
logger.info("Exception:", e)
return jsonify({'success': False, 'error': str(e)})
def app_run():
app.run(host="0.0.0.0", port=8062, debug=False)
......@@ -33,6 +33,7 @@ message LLMQuery {
message LLMAnswer {
string text = 1;
ConvoID id = 2;
string relevant_context = 3;
}
message Status {
string message = 1;
......
......@@ -27,4 +27,5 @@ seaborn
umap-learn
scikit-learn
plotly
kaleido
\ No newline at end of file
kaleido
ragas==0.1.7
\ No newline at end of file
......@@ -10,6 +10,11 @@ import json
import os
import uuid
from datasets import Dataset
from ragas.metrics import faithfulness, answer_relevancy
from ragas import evaluate
system_instruction = """
user
You are an assistant for answering questions.
......@@ -19,6 +24,8 @@ If you don't know the answer, just say "I do not know." Don't make up an answer.
local_directory = os.getenv("SHARED_FOLDER_PATH")
os.environ["OPENAI_API_KEY"] = "sk-ai-builder-rhzhJNo29KLDuN2gT4JsT3BlbkFJ7Vw7lOks4OqAm4wRw1YZ"
os.environ['OPENAI_ORGANIZATION'] = "org-XSbDbl5S9lfwrFTncFOBVOMx"
class RAGService(rag_pb2_grpc.RAGServiceServicer):
......@@ -70,6 +77,9 @@ class RAGService(rag_pb2_grpc.RAGServiceServicer):
self.responses[question_id] = answer.text
faithfulness, relevancy = self.calculate_metrics(answer.relevant_context, answer.text)
print(f'faithfulness, relevancy: {faithfulness}, {relevancy}')
qa_pairs.append(
{
"question_id": question_id,
......@@ -82,8 +92,29 @@ class RAGService(rag_pb2_grpc.RAGServiceServicer):
f.write(f"Question: {self.new_question}\n")
f.write(f"Answer: {answer.text}\n\n")
return rag_pb2.Status(message="All answers received successfully.")
with open("metrics.txt", mode="a+") as f:
# for e0, e1, e2, e3, e4, e5 in result:
f.write("|" + str(round(faithfulness,3)) + "|" + str(round(relevancy,3)) + "\n")
f.close()
return rag_pb2.Status(message="All answers received successfully.")
def calculate_metrics(self, relevant_info, rag_output):
data_samples = {
'question': [str(self.new_question)],
'answer': [str(rag_output)],
'contexts' : [[str(relevant_info)]]
}
dataset = Dataset.from_dict(data_samples)
faithfulness_score = evaluate(dataset,metrics=[faithfulness])
answer_relevancy_score = evaluate(dataset,metrics=[answer_relevancy])
return faithfulness_score["faithfulness"], answer_relevancy_score["answer_relevancy"]
def serve(port):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
......@@ -97,4 +128,5 @@ def serve(port):
if __name__ == "__main__":
port = 8061
open('metrics.txt', 'w').close()
serve(port)
......@@ -153,8 +153,99 @@
margin-top: 10px;
font-style: italic;
}
.rating {
display: flex;
flex-direction: row;
justify-content: left;
align-items: left;
}
.rating input {
display: none;
}
.rating label {
cursor: pointer;
width: 25px;
height: 25px;
margin: 0 2px;
font-size: 25px;
line-height: 25px;
}
.rating label:before {
content: '★';
}
.rating input:checked ~ label {
color: #FFD700;
}
.rating {
display: flex;
flex-direction: row;
justify-content: left;
align-items: left;
direction: rtl; /* Change direction to left to right */
}
.rating input[type="radio"] {
float: left;
clear: none;
}
.rating label {
float: left;
clear: none;
}
</style>
<script>
function rateReadme(rating) {
// Show the corresponding textarea for the clicked star
var feedbackTextarea = document.getElementById('feedback' + rating);
feedbackTextarea.style.display = 'block';
// Capture the user feedback
var feedback = prompt("Please provide your feedback for this rating:");
if (feedback !== null) {
feedbackTextarea.value = feedback;
} else {
// If the user cancels the prompt, hide the textarea
feedbackTextarea.style.display = 'none';
}
// Hide other textareas for stars with lower ratings
for (var i = 1; i <= 5; i++) {
if (i !== rating) {
var otherTextarea = document.getElementById('feedback' + i);
otherTextarea.style.display = 'none';
otherTextarea.value = ''; // Clear previous feedback
}
}
// Send the rating and feedback to the server
var data = {
"rating": rating,
"feedback": feedback
};
fetch('/rate_readme', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(data),
})
.then(response => response.json())
.then(data => {
console.log('Success:', data);
// Handle success if needed
})
.catch((error) => {
console.error('Error:', error);
// Handle error if needed
});
}
function handleUploadTypeChange() {
const uploadType = document.getElementById('uploadType').value;
const pdfSection = document.getElementById('pdfUploadSection');
......@@ -388,7 +479,55 @@
</form>
<div id="indexResponse"></div>
</div>
<!-- Rate Me -->
<div class="section">
<h3>Rate ME</h3>
<p>Assess the quality of the generated responses based on criteria such as relevance, coherence, fluency, and overall satisfaction.</p>
<div class="rating">
<input type="radio" id="star5" name="rating" value="5" onclick="rateReadme(5)">
<label for="star5"></label>
<input type="radio" id="star4" name="rating" value="4" onclick="rateReadme(4)">
<label for="star4"></label>
<input type="radio" id="star3" name="rating" value="3" onclick="rateReadme(3)">
<label for="star3"></label>
<input type="radio" id="star2" name="rating" value="2" onclick="rateReadme(2)">
<label for="star2"></label>
<input type="radio" id="star1" name="rating" value="1" onclick="rateReadme(1)">
<label for="star1"></label>
<textarea id="feedback5" name="feedback" style="display: none;"></textarea>
<textarea id="feedback4" name="feedback" style="display: none;"></textarea>
<textarea id="feedback3" name="feedback" style="display: none;"></textarea>
<textarea id="feedback2" name="feedback" style="display: none;"></textarea>
<textarea id="feedback1" name="feedback" style="display: none;"></textarea>
</div>
</div>
<!-- Metrics -->
<div class="section">
<h3> Evaluation Metrics</h3>
<p>Faithfulness measures how accurately an answer reflects the provided information or known facts, with a higher score indicating low hallucinations. Answer relevancy score assesses how closely the answer aligns with the user's query or context.</p>
{% if faithfulness %}
<p> Faithfulness score : {{ faithfulness }}</p>
{% endif %}
{% if relevancy %}
<p> Answer Relevancy score : {{ relevancy }}</p>
{% endif %}
</div>
<!-- Visualization Section -->
<div class="section">
<h2>Visualizations</h2>
......@@ -397,6 +536,8 @@
<a href="/umap_plot" id="umapPlotLink" class="button">View UMAP Visualization</a>
<a href="/cluster_plot" id="clusterPlotLink" class="button">View Cluster Visualization</a>
</div>
</div>
<!-- Large Column (RAG Expert - Chatbot) -->
......
syntax = "proto3";
message Empty {
}
message LLMConfig {
double temp = 1;
}
message PromptInput {
string system = 1;
string user = 2;
string context = 3;
string prompt = 4;
string prompt = 4;
}
message LLMConfig {
double temp = 1;
message UserQuestion {
string question = 1;
}
message ConvoID {
string q_id = 1;
}
message LLMQuery {
LLMConfig config = 1;
PromptInput input = 2;
UserQuestion qa = 3;
ConvoID id = 4;
}
message LLMAnswer {
string text = 1;
ConvoID id = 2;
string relevant_context = 3;
}
message Status {
string message = 1;
}
service LLMService {
rpc instruct_llm(LLMQuery) returns(LLMAnswer);
rpc instruct_llm( LLMQuery) returns( LLMAnswer);
rpc instruct_llm_stream( stream LLMQuery) returns( stream LLMAnswer);
}
\ No newline at end of file
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
import os
# FAISS folder path
local_directory = os.getenv("SHARED_FOLDER_PATH")
faiss_folder = os.path.join(local_directory, "faiss_index")
def faiss_db():
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
try:
saved_db = FAISS.load_local(
faiss_folder, embeddings, allow_dangerous_deserialization=True
)
retriever = saved_db.as_retriever(
search_type="similarity", search_kwargs={"k": 4}
)
return retriever
except Exception as e:
print(f"Error loading FAISS database: {e}")
return None
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
def get_context(retriever, question):
docs = retriever.get_relevant_documents(question)
# Extract context from the document
context = "\n".join([doc.page_content for doc in docs])
return context
def rag_chain(retriever, system_instruction, llm_service, question):
prompt_template_generic = """
<|start_header_id|>user<|end_header_id|>
You are an assistant for answering questions about the European AI Act.
You are given the extracted parts of a long document and a question. Provide a conversational answer.
If you don't know the answer, just say "I do not know." Don't make up an answer.
Question: {question}
Context: {context}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
prompt = PromptTemplate(
input_variables=["context", "question"],
template=prompt_template_generic,
)
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm_service
| StrOutputParser()
)
# Invoke the chain with the provided question
rag_ans = rag_chain.invoke(question)
# Get relevant context
relevant_context= get_context(retriever, question)
return rag_ans, relevant_context
\ No newline at end of file
......@@ -9,27 +9,32 @@ import llms_pb2_grpc
from app import app_run
llm = IAIS_LLM(model_name='{{ model_name }}')
class LLMServiceServicer(llms_pb2_grpc.LLMServiceServicer):
def __init__(self, app_type="invoke_direct_llm"):
self.application = LLMApplicationManager.get_application(app_type) # Returns a class
def instruct_llm_stream(self, request_iterator, context):
for llm_query in request_iterator:
response_text, relevant_context = self.application.process_request(llm_query)
print(f"response : {response_text} and relevant information for metrics : {relevant_context}")
print(
f"Processed: {llm_query.input.prompt} with question: {llm_query.qa.question}"
)
yield llms_pb2.LLMAnswer(
id=llms_pb2.ConvoID(q_id=llm_query.id.q_id), text=response_text, relevant_context=relevant_context
)
def instruct_llm(self, request, context):
if request.input.prompt:
response_text = llm.invoke('List some vegetables rich in iron-')
else:
response_text = llm.invoke(request.input.user)
return llms_pb2.LLMAnswer(text=response_text)
def serve(port):
def serve(port, app_type="invoke_direct_llm"):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
llms_pb2_grpc.add_LLMServiceServicer_to_server(LLMServiceServicer(), server)
server.add_insecure_port("[::]:{}".format(port))
print("Starting server. Listening on port : " + str(port))
llms_pb2_grpc.add_LLMServiceServicer_to_server(LLMServiceServicer(app_type), server)
server.add_insecure_port(f"[::]:{port}")
print(f"Starting server. Listening on port: {port} with app type: {app_type}")
server.start()
threading.Thread(target=app_run).start()
server.wait_for_termination()
if __name__ == "__main__":
port = 8061
serve(port)
\ No newline at end of file
# Example usage: serve(8061, app_type="rag") to start with the RAG application
serve(8061, app_type="rag")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment