Skip to content
Snippets Groups Projects
Commit cea201ec authored by SM Farhad Ali's avatar SM Farhad Ali
Browse files

csv data analysis with pandas ai added

parent 5b9888be
Branches main
No related tags found
No related merge requests found
# Use an appropriate Python base image with Python 3.9 or later
FROM python:3.9-slim
# Set the working directory inside the container
WORKDIR /app
# Update and install necessary system packages
RUN apt-get update && apt-get install -y \
build-essential \
curl \
software-properties-common \
git \
&& rm -rf /var/lib/apt/lists/*
# Copy just the requirements file first to leverage Docker caching
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the application code into the container
COPY . .
# Expose the port that Streamlit uses
EXPOSE 8501
# Healthcheck to verify Streamlit app is running
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health || exit 1
# Command to run the Streamlit app
CMD ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
File added
version: '3.8'
services:
streamlit:
build:
context: .
dockerfile: Dockerfile
container_name: streamlit
ports:
- "8501:8501"
environment:
- PYTHONUNBUFFERED=1 # Ensures Python output is logged in real-time
- LLM_AUTH_TOKEN=c2FsaTpQYXNzd29yZEAx
- LLM_ENDPOINT=https://genai.iais.fraunhofer.de/api/v1/models/Mistral-7B-Instruct-v0.3_t2t # Set the LLM endpoint accessible from the streamlit service
restart: unless-stopped
volumes:
- ./exports/charts:/app/exports/charts
csv_data_analysis/exports/charts/temp_chart.png

14.9 KiB

import requests
from pandasai.llm.base import LLM
from pandasai.prompts.base import BasePrompt
from pandasai.helpers.memory import Memory
class CustomMistralLLM(LLM):
def __init__(self, api_url: str, api_token: str):
self.api_url = api_url
self.api_token = api_token
@property
def type(self) -> str:
return "custom_mistral"
def call(self, instruction: BasePrompt, context: Memory = None) -> str:
headers = {
"accept": "application/json; charset=utf-8",
"Process-Mode": "sync",
"Authorization": f"Basic {self.api_token}",
"Content-Type": "application/json; charset=utf-8",
}
payload = {
"prompts": [{"role": "user", "content": instruction.to_string()}],
"doSample": True,
"maxTokens": 1200,
"numBeams": 1,
"repPenalty": 1.2,
"temperature": 0,
"topK": 50,
"topP": 0.6
}
response = requests.post(self.api_url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
print(data['payload']['data']['text'])
print("############")
return data['payload']['data']['text']
syntax = "proto3";
message Empty {
}
message NewsText {
string text = 1;
}
service NewsDatabroker {
rpc pullData(Empty) returns(NewsText);
}
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: model.proto
# Protobuf Python Version: 5.26.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bmodel.proto\"\x07\n\x05\x45mpty\"\x18\n\x08NewsText\x12\x0c\n\x04text\x18\x01 \x01(\t2/\n\x0eNewsDatabroker\x12\x1d\n\x08pullData\x12\x06.Empty\x1a\t.NewsTextb\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'model_pb2', _globals)
if not _descriptor._USE_C_DESCRIPTORS:
DESCRIPTOR._loaded_options = None
_globals['_EMPTY']._serialized_start=15
_globals['_EMPTY']._serialized_end=22
_globals['_NEWSTEXT']._serialized_start=24
_globals['_NEWSTEXT']._serialized_end=48
_globals['_NEWSDATABROKER']._serialized_start=50
_globals['_NEWSDATABROKER']._serialized_end=97
# @@protoc_insertion_point(module_scope)
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import warnings
import model_pb2 as model__pb2
GRPC_GENERATED_VERSION = '1.64.1'
GRPC_VERSION = grpc.__version__
EXPECTED_ERROR_RELEASE = '1.65.0'
SCHEDULED_RELEASE_DATE = 'June 25, 2024'
_version_not_supported = False
try:
from grpc._utilities import first_version_is_lower
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
except ImportError:
_version_not_supported = True
if _version_not_supported:
warnings.warn(
f'The grpc package installed is at version {GRPC_VERSION},'
+ f' but the generated code in model_pb2_grpc.py depends on'
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
+ f' This warning will become an error in {EXPECTED_ERROR_RELEASE},'
+ f' scheduled for release on {SCHEDULED_RELEASE_DATE}.',
RuntimeWarning
)
class NewsDatabrokerStub(object):
"""Missing associated documentation comment in .proto file."""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.pullData = channel.unary_unary(
'/NewsDatabroker/pullData',
request_serializer=model__pb2.Empty.SerializeToString,
response_deserializer=model__pb2.NewsText.FromString,
_registered_method=True)
class NewsDatabrokerServicer(object):
"""Missing associated documentation comment in .proto file."""
def pullData(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_NewsDatabrokerServicer_to_server(servicer, server):
rpc_method_handlers = {
'pullData': grpc.unary_unary_rpc_method_handler(
servicer.pullData,
request_deserializer=model__pb2.Empty.FromString,
response_serializer=model__pb2.NewsText.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'NewsDatabroker', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
server.add_registered_method_handlers('NewsDatabroker', rpc_method_handlers)
# This class is part of an EXPERIMENTAL API.
class NewsDatabroker(object):
"""Missing associated documentation comment in .proto file."""
@staticmethod
def pullData(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/NewsDatabroker/pullData',
model__pb2.Empty.SerializeToString,
model__pb2.NewsText.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
langchain
langchain_community
pandasai==2.2.4
streamlit
requests
python-dotenv
protobuf
\ No newline at end of file
import streamlit as st
import pandas as pd
from dotenv import load_dotenv
from os.path import dirname, join
from pandasai import SmartDataframe
from llms.CustomMistralLLM import CustomMistralLLM
import os
# Load environment variables
load_dotenv()
st.set_page_config(page_title='Data Analysis with Pandas AI')
st.title('Data Analysis with Pandas AI')
uploaded_file = st.file_uploader("Choose a .csv file", type="csv")
LLM_ENDPOINT = os.getenv('LLM_ENDPOINT')
#llm = Ollama(model="mistral", base_url=LLM_ENDPOINT)
api_url = LLM_ENDPOINT
api_token = os.getenv("LLM_AUTH_TOKEN")
llm = CustomMistralLLM(
api_url,
api_token
)
# Check if a file has been uploaded
if uploaded_file:
# Read the uploaded CSV file into a pandas dataframe
data = pd.read_csv(uploaded_file)
st.subheader(f"File uploaded: {uploaded_file.name}")
st.write(data.head(2))
# Define a path to save charts (if needed)
base_path = dirname(__file__)
save_charts_path = join(base_path, 'exports', 'charts')
print(save_charts_path)
# Initialize SmartDataframe for interactive data analysis
smart_df = SmartDataframe(data, config= {
"llm": llm,
"verbose": True,
'open_charts': True,
# 'save_charts': True, # Save generated charts
# 'enable_cache': False
})
prompt = st.text_area("Enter your prompt:")
# Generate a response when the button is clicked
if st.button("Generate"):
if prompt:
with st.spinner("Generating response..."):
try:
# Get the response from the SmartDataframe
response = smart_df.chat(prompt)
if isinstance(response, pd.DataFrame):
st.write(response)
elif isinstance(response, str) and response.endswith(".png"):
st.image(response)
else:
st.write(response)
except Exception as e:
# Display an error message if there's an issue
st.error(f"Error generating response: {e}")
else:
# Warn the user if the prompt is empty
st.warning("Please enter a prompt!")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment