Spaces:

simone-papicchio
/

qatch-demo

Running

App Files Files Community

simone-papicchio

franceth commited on Apr 2

Commit

41606a6

verified ·

1 Parent(s): 2c24650

db_schema , csv fix, check columns and number tables, eval markdown, style (#16)

Browse files

- db_schema , csv fix, check columns and number tables, eval markdown, style (51d3b40e690563bc1c21a8c3139ca44a396861a6)

Co-authored-by: Francesco Giannuzzo <[email protected]>

Files changed (3) hide show

app.py +83 -40
style.css +4 -2
utilities.py +7 -5

app.py CHANGED Viewed

@@ -81,9 +81,12 @@ def load_data(file, path, use_default):
         try:
             input_data["input_method"] = 'uploaded_file'
             input_data["db_name"] = os.path.splitext(os.path.basename(file))[0]
-            #TODO if not sqlite
-            #input_data["data_path"] = os.path.join(".", "data", "data_interface",f"{input_data['db_name']}.sqlite")
-            input_data["data_path"] = file #f"{input_data['db_name']}.sqlite"
             input_data["data"] = us.load_data(file, input_data["db_name"])
             df_current = input_data["data"]['data_frames'].get('MyTable', df_default)  # Carica il DataFrame
             if(input_data["data"]['data_frames'] and input_data["data"]["db"] is None): #for csv and xlsx files
@@ -303,6 +306,7 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
     with select_table_acc:
         previous_selection = gr.State([])
         table_selector = gr.CheckboxGroup(choices=[], label="Select tables from the choosen database", value=[])
         table_outputs = [gr.DataFrame(label=f"Table {i+1}", interactive=True, visible=False) for i in range(50)]
         selected_table_names = gr.Textbox(label="Selected tables", visible=False, interactive=False)
@@ -310,55 +314,80 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
         open_model_selection = gr.Button("Choose your models", interactive=False)
         def update_table_list(data):
-            """Dynamically updates the list of available tables."""
             if isinstance(data, dict) and data:
                 table_names = []
-                if input_data['input_method'] == "default":
                     table_names.append("All")
-                elif len(data) < 6:
-                    table_names.append("All")  # In caso ci siano poche tabelle, ha senso mantenere "All"
-                table_names.extend(data.keys())
-                return gr.update(choices=table_names, value=[])  # Reset selections
-            return gr.update(choices=[], value=[])
         def show_selected_tables(data, selected_tables):
             updates = []
-            available_tables = list(data.keys()) if isinstance(data, dict) and data else []
-            input_method = input_data['input_method']
             allow_all = input_method == "default" or len(available_tables) < 6
             selected_set = set(selected_tables)
             tables_set = set(available_tables)
-            # ▶️
             if allow_all:
                 if "All" in selected_set:
                     selected_tables = ["All"] + available_tables
                 elif selected_set == tables_set:
                     selected_tables = []
                 else:
-                    #
                     selected_tables = [t for t in selected_tables if t in available_tables]
             else:
-                #
                 selected_tables = [t for t in selected_tables if t in available_tables and t != "All"][:5]
-            #
             tables = {name: data[name] for name in selected_tables if name in data}
             for i, (name, df) in enumerate(tables.items()):
                 updates.append(gr.update(value=df, label=f"Table: {name}", visible=True, interactive=False))
             for _ in range(len(tables), 50):
                 updates.append(gr.update(visible=False))
-            # ✅ Bottone abilitato solo se c'è almeno una tabella valida
             updates.append(gr.update(interactive=bool(tables)))
-            # 🔄 Aggiorna la CheckboxGroup con logica coerente
             if allow_all:
                 updates.insert(0, gr.update(
                     choices=["All"] + available_tables,
@@ -389,7 +418,7 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
             return gr.update(value="", visible=False)
         # Automatically updates the checkbox list when `data_state` changes
-        data_state.change(fn=update_table_list, inputs=[data_state], outputs=[table_selector])
         # Updates the visible tables and the button state based on user selections
         #table_selector.change(fn=show_selected_tables, inputs=[data_state, table_selector], outputs=table_outputs + [open_model_selection])
@@ -602,9 +631,20 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                     {mirrored_symbols}
                 </div>
                 """
-            #return f"{css_symbols}"+f"# Loading {percent}% #"+f"{mirrored_symbols}"
         def qatch_flow():
             #caching
             global reset_flag
@@ -620,7 +660,7 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                 reset_flag = False
                 for model in input_data['models']:
                     model_image_path = next((m["image_path"] for m in model_list_dict if m["code"] == model), None)
-                    yield gr.Image(model_image_path), gr.Markdown(), gr.Markdown(), gr.Markdown(), metrics_conc, *[predictions_dict[model][columns_to_visulize] for model in model_list]
                     count=1
                     for _, row in predictions_dict[model].iterrows():
                     #for index, row in target_df.iterrows():
@@ -636,7 +676,7 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                                                         <div style='font-size: 3rem'>➡️</div>
                                                     </div>
                                                 """
-                            yield gr.Image(), gr.Markdown(load_text), gr.Markdown(display_question), gr.Markdown(), metrics_conc, *[predictions_dict[model][columns_to_visulize] for model in model_list]
                             #time.sleep(0.02)
                             prediction = row['predicted_sql']
@@ -646,19 +686,19 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                                                         <div class='sqlquery' font-family: 'Inter', sans-serif;>{prediction}</div>
                                                     </div>
                                                 """
-                            yield gr.Image(), gr.Markdown(load_text), gr.Markdown(), gr.Markdown(display_prediction), metrics_conc, *[predictions_dict[model][columns_to_visulize] for model in model_list]
-                    yield gr.Image(), gr.Markdown(load_text), gr.Markdown(), gr.Markdown(display_prediction), metrics_conc, *[predictions_dict[model][columns_to_visulize] for model in model_list]
                 metrics_conc = target_df
                 if 'valid_efficiency_score' not in metrics_conc.columns:
                     metrics_conc['valid_efficiency_score'] = metrics_conc['VES']
-                yield gr.Image(), gr.Markdown(), gr.Markdown(), gr.Markdown(), metrics_conc, *[predictions_dict[model][columns_to_visulize] for model in model_list]
             else:
                 orchestrator_generator = OrchestratorGenerator()
                 # TODO: add to target_df column target_df["columns_used"], tables selection
                 # print(input_data['data']['db'])
                 #print(input_data['data']['selected_tables'])
-                #TODO s
                 target_df = orchestrator_generator.generate_dataset(connector=input_data['data']['db'], tables_to_include=input_data['data']['selected_tables'])
                 #target_df = orchestrator_generator.generate_dataset(connector=input_data['data']['db'], tables_to_includes=None)
@@ -666,10 +706,10 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                 reset_flag = False
                 for model in input_data["models"]:
                     model_image_path = next((m["image_path"] for m in model_list_dict if m["code"] == model), None)
-                    yield gr.Image(model_image_path), gr.Markdown(), gr.Markdown(), gr.Markdown(), metrics_conc, *[predictions_dict[model] for model in model_list]
                     count=0
                     for index, row in target_df.iterrows():
-                        if (reset_flag == False):
                             percent_complete = round(((index+1) / len(target_df)) * 100, 2)
                             load_text = f"{generate_loading_text(percent_complete)}"
@@ -680,9 +720,9 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                                                         <div style='font-size: 3rem'>➡️</div>
                                                     </div>
                                                 """
-                            yield gr.Image(), gr.Markdown(load_text), gr.Markdown(display_question), gr.Markdown(), metrics_conc, *[predictions_dict[model]for model in model_list]
                             start_time = time.time()
-                            samples = us.generate_some_samples(input_data['data']['db'], row["tbl_name"])
                             schema_text = utils_get_db_tables_info.utils_extract_db_schema_as_string(
                             db_id = input_data["db_name"],
@@ -700,7 +740,7 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                             answer = response['response']
                             end_time = time.time()
-                            display_prediction = f"""<div class='loading' style='font-size: 1.7rem; font-family: 'Inter', sans-serif;'>>Predicted SQL:</div>
                                                     <div style='display: flex; align-items: center;'>
                                                         <div style='font-size: 3rem'>➡️</div>
                                                         <div class='sqlquery' font-family: 'Inter', sans-serif;>{prediction}</div>
@@ -717,7 +757,7 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                                 'price':price,
                                 'answer':answer,
                                 'number_question':count,
-                                'prompt' : prompt_to_send
                             }]).dropna(how="all")  # Remove only completely empty rows
                             count=count+1
                             # TODO: use a for loop
@@ -730,10 +770,12 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                                 predictions_dict[model] = pd.concat([predictions_dict[model], new_row], ignore_index=True)
                             # yield gr.Textbox(), gr.Textbox(prediction), *[predictions_dict[model] for model in input_data["models"]], None
-                            yield gr.Image(), gr.Markdown(load_text), gr.Markdown(), gr.Markdown(display_prediction), metrics_conc, *[predictions_dict[model]for model in model_list]
-                    yield gr.Image(), gr.Markdown(load_text), gr.Markdown(), gr.Markdown(display_prediction), metrics_conc, *[predictions_dict[model] for model in model_list]
                     # END
                 evaluator = OrchestratorEvaluator()
                 for model in input_data["models"]:
                     metrics_df_model = evaluator.evaluate_df(
@@ -747,8 +789,8 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                 if 'valid_efficiency_score' not in metrics_conc.columns:
                     metrics_conc['valid_efficiency_score'] = metrics_conc['VES']
-                yield gr.Image(), gr.Markdown(), gr.Markdown(), gr.Markdown(), metrics_conc, *[predictions_dict[model] for model in model_list]
         # Loading Bar
         with gr.Row():
@@ -771,8 +813,6 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
             with gr.Column():
                 with gr.Column():
                     prediction_display = gr.Markdown()
-        evaluation_loading = gr.Markdown() # 𓆡
         dataframe_per_model = {}
@@ -793,6 +833,9 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
                     dataframe_per_model[model] = gr.DataFrame()
                     # download_pred_model = gr.DownloadButton(label="Download Prediction per Model", visible=False)
         def change_tab():
             return [gr.update(visible=(model in input_data["models"])) for model in model_list]
@@ -809,7 +852,7 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
         submit_models_button.click(
             fn=qatch_flow,
             inputs=[],
-            outputs=[model_logo, variable, question_display, prediction_display, metrics_df] + list(dataframe_per_model.values())
         )
         submit_models_button.click(

         try:
             input_data["input_method"] = 'uploaded_file'
             input_data["db_name"] = os.path.splitext(os.path.basename(file))[0]
+            if file.endswith('.sqlite'):
+                #return 'Error: The uploaded file is not a valid SQLite database.'
+                input_data["data_path"] = file #os.path.join(".", "data", "data_interface",f"{input_data['db_name']}.sqlite")
+            else:
+                #change path
+                input_data["data_path"] = os.path.join(".", f"{input_data['db_name']}.sqlite")
             input_data["data"] = us.load_data(file, input_data["db_name"])
             df_current = input_data["data"]['data_frames'].get('MyTable', df_default)  # Carica il DataFrame
             if(input_data["data"]['data_frames'] and input_data["data"]["db"] is None): #for csv and xlsx files
     with select_table_acc:
         previous_selection = gr.State([])
         table_selector = gr.CheckboxGroup(choices=[], label="Select tables from the choosen database", value=[])
+        excluded_tables_info = gr.HTML(label="Non-selectable tables (too many columns)", visible=False)
         table_outputs = [gr.DataFrame(label=f"Table {i+1}", interactive=True, visible=False) for i in range(50)]
         selected_table_names = gr.Textbox(label="Selected tables", visible=False, interactive=False)
         open_model_selection = gr.Button("Choose your models", interactive=False)
         def update_table_list(data):
+            """Dynamically updates the list of available tables and excluded ones."""
             if isinstance(data, dict) and data:
                 table_names = []
+                excluded_tables = []
+                data_frames = input_data['data'].get('data_frames', {})
+                available_tables = []
+                for name, df in data.items():
+                    df_real = data_frames.get(name, None)
+                    if df_real is not None and df_real.shape[1] > 15:
+                        excluded_tables.append(name)
+                    else:
+                        available_tables.append(name)
+                if input_data['input_method'] == "default" or len(available_tables) < 6:
                     table_names.append("All")
+                table_names.extend(available_tables)
+                # Prepara il testo da mostrare
+                if excluded_tables:
+                    excluded_text = "<b>⚠️ The following tables have more than 15 columns and cannot be selected:</b><br>" + "<br>".join(f"- {t}" for t in excluded_tables)
+                    excluded_visible = True
+                else:
+                    excluded_text = ""
+                    excluded_visible = False
+                return [
+                    gr.update(choices=table_names, value=[]),  # CheckboxGroup update
+                    gr.update(value=excluded_text, visible=excluded_visible)  # HTML display update
+                ]
+            return [
+                gr.update(choices=[], value=[]),
+                gr.update(value="", visible=False)
+            ]
         def show_selected_tables(data, selected_tables):
             updates = []
+            data_frames = input_data['data'].get('data_frames', {})
+            available_tables = []
+            for name, df in data.items():
+                df_real = data_frames.get(name)
+                if df_real is not None and df_real.shape[1] <= 15:
+                    available_tables.append(name)
+            input_method = input_data['input_method']
             allow_all = input_method == "default" or len(available_tables) < 6
             selected_set = set(selected_tables)
             tables_set = set(available_tables)
             if allow_all:
                 if "All" in selected_set:
                     selected_tables = ["All"] + available_tables
                 elif selected_set == tables_set:
                     selected_tables = []
                 else:
                     selected_tables = [t for t in selected_tables if t in available_tables]
             else:
                 selected_tables = [t for t in selected_tables if t in available_tables and t != "All"][:5]
             tables = {name: data[name] for name in selected_tables if name in data}
             for i, (name, df) in enumerate(tables.items()):
                 updates.append(gr.update(value=df, label=f"Table: {name}", visible=True, interactive=False))
             for _ in range(len(tables), 50):
                 updates.append(gr.update(visible=False))
             updates.append(gr.update(interactive=bool(tables)))
             if allow_all:
                 updates.insert(0, gr.update(
                     choices=["All"] + available_tables,
             return gr.update(value="", visible=False)
         # Automatically updates the checkbox list when `data_state` changes
+        data_state.change(fn=update_table_list, inputs=[data_state], outputs=[table_selector, excluded_tables_info])
         # Updates the visible tables and the button state based on user selections
         #table_selector.change(fn=show_selected_tables, inputs=[data_state, table_selector], outputs=table_outputs + [open_model_selection])
                     {mirrored_symbols}
                 </div>
                 """
+        def generate_eval_text(text):
+            symbols = "𓆡 "
+            mirrored_symbols = f'<span class="mirrored">{symbols.strip()}</span>'
+            css_symbols = f'<span class="fish">{symbols.strip()}</span>'
+            return f"""
+                <div class='barcontainer'>
+                    {css_symbols}
+                    <span class='loading' style="font-family: 'Inter', sans-serif;">
+                        {text}
+                    </span>
+                    {mirrored_symbols}
+                </div>
+                """
         def qatch_flow():
             #caching
             global reset_flag
                 reset_flag = False
                 for model in input_data['models']:
                     model_image_path = next((m["image_path"] for m in model_list_dict if m["code"] == model), None)
+                    yield gr.Markdown(visible=False), gr.Image(model_image_path), gr.Markdown(), gr.Markdown(), gr.Markdown(), metrics_conc, *[predictions_dict[model][columns_to_visulize] for model in model_list]
                     count=1
                     for _, row in predictions_dict[model].iterrows():
                     #for index, row in target_df.iterrows():
                                                         <div style='font-size: 3rem'>➡️</div>
                                                     </div>
                                                 """
+                            yield gr.Markdown(), gr.Image(), gr.Markdown(load_text), gr.Markdown(display_question), gr.Markdown(), metrics_conc, *[predictions_dict[model][columns_to_visulize] for model in model_list]
                             #time.sleep(0.02)
                             prediction = row['predicted_sql']
                                                         <div class='sqlquery' font-family: 'Inter', sans-serif;>{prediction}</div>
                                                     </div>
                                                 """
+                            yield gr.Markdown(), gr.Image(), gr.Markdown(load_text), gr.Markdown(), gr.Markdown(display_prediction), metrics_conc, *[predictions_dict[model][columns_to_visulize] for model in model_list]
+                    yield gr.Markdown(), gr.Image(), gr.Markdown(load_text), gr.Markdown(), gr.Markdown(display_prediction), metrics_conc, *[predictions_dict[model][columns_to_visulize] for model in model_list]
                 metrics_conc = target_df
                 if 'valid_efficiency_score' not in metrics_conc.columns:
                     metrics_conc['valid_efficiency_score'] = metrics_conc['VES']
+                eval_text = generate_eval_text("End evaluation")
+                yield gr.Markdown(eval_text, visible=True), gr.Image(), gr.Markdown(), gr.Markdown(), gr.Markdown(), metrics_conc, *[predictions_dict[model][columns_to_visulize] for model in model_list]
             else:
                 orchestrator_generator = OrchestratorGenerator()
                 # TODO: add to target_df column target_df["columns_used"], tables selection
                 # print(input_data['data']['db'])
                 #print(input_data['data']['selected_tables'])
                 target_df = orchestrator_generator.generate_dataset(connector=input_data['data']['db'], tables_to_include=input_data['data']['selected_tables'])
                 #target_df = orchestrator_generator.generate_dataset(connector=input_data['data']['db'], tables_to_includes=None)
                 reset_flag = False
                 for model in input_data["models"]:
                     model_image_path = next((m["image_path"] for m in model_list_dict if m["code"] == model), None)
+                    yield gr.Markdown(visible=False), gr.Image(model_image_path), gr.Markdown(), gr.Markdown(), gr.Markdown(), metrics_conc, *[predictions_dict[model] for model in model_list]
                     count=0
                     for index, row in target_df.iterrows():
+                        if (reset_flag == False):
                             percent_complete = round(((index+1) / len(target_df)) * 100, 2)
                             load_text = f"{generate_loading_text(percent_complete)}"
                                                         <div style='font-size: 3rem'>➡️</div>
                                                     </div>
                                                 """
+                            yield gr.Markdown(), gr.Image(), gr.Markdown(load_text), gr.Markdown(display_question), gr.Markdown(), metrics_conc, *[predictions_dict[model]for model in model_list]
                             start_time = time.time()
+                            samples = us.generate_some_samples(input_data["data_path"], row["tbl_name"])
                             schema_text = utils_get_db_tables_info.utils_extract_db_schema_as_string(
                             db_id = input_data["db_name"],
                             answer = response['response']
                             end_time = time.time()
+                            display_prediction = f"""<div class='loading' style='font-size: 1.7rem; font-family: 'Inter', sans-serif;'>Predicted SQL:</div>
                                                     <div style='display: flex; align-items: center;'>
                                                         <div style='font-size: 3rem'>➡️</div>
                                                         <div class='sqlquery' font-family: 'Inter', sans-serif;>{prediction}</div>
                                 'price':price,
                                 'answer':answer,
                                 'number_question':count,
+                                'prompt': prompt_to_send
                             }]).dropna(how="all")  # Remove only completely empty rows
                             count=count+1
                             # TODO: use a for loop
                                 predictions_dict[model] = pd.concat([predictions_dict[model], new_row], ignore_index=True)
                             # yield gr.Textbox(), gr.Textbox(prediction), *[predictions_dict[model] for model in input_data["models"]], None
+                            yield gr.Markdown(), gr.Image(), gr.Markdown(load_text), gr.Markdown(), gr.Markdown(display_prediction), metrics_conc, *[predictions_dict[model]for model in model_list]
+                    yield gr.Markdown(), gr.Image(), gr.Markdown(load_text), gr.Markdown(), gr.Markdown(display_prediction), metrics_conc, *[predictions_dict[model] for model in model_list]
                     # END
+                eval_text = generate_eval_text("Evaluation")
+                yield gr.Markdown(eval_text, visible=True), gr.Image(), gr.Markdown(), gr.Markdown(), gr.Markdown(), metrics_conc, *[predictions_dict[model] for model in model_list]
                 evaluator = OrchestratorEvaluator()
                 for model in input_data["models"]:
                     metrics_df_model = evaluator.evaluate_df(
                 if 'valid_efficiency_score' not in metrics_conc.columns:
                     metrics_conc['valid_efficiency_score'] = metrics_conc['VES']
+                eval_text = generate_eval_text("End evaluation")
+                yield gr.Markdown(eval_text, visible=True), gr.Image(), gr.Markdown(), gr.Markdown(), gr.Markdown(), metrics_conc, *[predictions_dict[model] for model in model_list]
         # Loading Bar
         with gr.Row():
             with gr.Column():
                 with gr.Column():
                     prediction_display = gr.Markdown()
         dataframe_per_model = {}
                     dataframe_per_model[model] = gr.DataFrame()
                     # download_pred_model = gr.DownloadButton(label="Download Prediction per Model", visible=False)
+        evaluation_loading = gr.Markdown()
         def change_tab():
             return [gr.update(visible=(model in input_data["models"])) for model in model_list]
         submit_models_button.click(
             fn=qatch_flow,
             inputs=[],
+            outputs=[evaluation_loading, model_logo, variable, question_display, prediction_display, metrics_df] + list(dataframe_per_model.values())
         )
         submit_models_button.click(

style.css CHANGED Viewed

@@ -57,13 +57,15 @@ body, label, button, span, li, p, .prose {
 .mirrored {
     display: inline-block;
     transform: scaleX(-1);
     font-family: 'Inter', sans-serif;
     font-size: 1.5rem;
     font-weight: 700;
     letter-spacing: 1px;
     text-align: center;
     color: #222;
-    background: linear-gradient(45deg, #1a41d9, #6c69d2);
     -webkit-background-clip: text;
     -webkit-text-fill-color: transparent;
     padding: 20px;
@@ -78,7 +80,7 @@ body, label, button, span, li, p, .prose {
     letter-spacing: 1px;
     text-align: center;
     color: #222;
-    background: linear-gradient(45deg, #1a41d9, #6c69d2);
     -webkit-background-clip: text;
     -webkit-text-fill-color: transparent;
     padding: 20px;

 .mirrored {
     display: inline-block;
     transform: scaleX(-1);
+    position: relative;
+    top: -9.5px;
     font-family: 'Inter', sans-serif;
     font-size: 1.5rem;
     font-weight: 700;
     letter-spacing: 1px;
     text-align: center;
     color: #222;
+    background: linear-gradient(45deg, #1a41d9, #06ffe6);
     -webkit-background-clip: text;
     -webkit-text-fill-color: transparent;
     padding: 20px;
     letter-spacing: 1px;
     text-align: center;
     color: #222;
+    background: linear-gradient(45deg, #1a41d9, #06ffe6);
     -webkit-background-clip: text;
     -webkit-text-fill-color: transparent;
     padding: 20px;

utilities.py CHANGED Viewed

@@ -94,16 +94,18 @@ def increment_filename(filename):
     return new_base + ext
 def prepare_prompt(prompt, question, schema, samples):
-    prompt = prompt.replace("{schema}", schema).replace("{question}", question)
-    prompt += f" Some istanze: {samples}"
     return prompt
-def generate_some_samples(connector, tbl_name):
     samples = []
     query = f"SELECT * FROM {tbl_name} LIMIT 3"
     try:
-        sample_data = connector.execute_query(query)
-        samples.append(str(sample_data))
     except Exception as e:
         samples.append(f"Error: {e}")
     return samples

     return new_base + ext
 def prepare_prompt(prompt, question, schema, samples):
+    prompt = prompt.replace("{db_schema}", schema).replace("{question}", question)
+    prompt += f" Some instances: {samples}"
     return prompt
+def generate_some_samples(file_path, tbl_name):
+    conn = sqlite3.connect(file_path)
     samples = []
     query = f"SELECT * FROM {tbl_name} LIMIT 3"
     try:
+        sample_data = pd.read_sql_query(query, conn)
+        samples.append(sample_data.to_dict(orient="records"))
+        #samples.append(str(sample_data))
     except Exception as e:
         samples.append(f"Error: {e}")
     return samples