Spaces:
Sleeping
Sleeping
ACMCMC
commited on
Commit
·
551646a
1
Parent(s):
a6bd112
First final version
Browse files- app.py +19 -7
- llm_res.py +6 -9
- utils.py +1 -1
app.py
CHANGED
@@ -111,20 +111,20 @@ with st.container():
|
|
111 |
status.write(
|
112 |
"Augmenting the set of diseases by finding others with related embeddings..."
|
113 |
)
|
114 |
-
augmented_set_of_diseases = augment_the_set_of_diseaces(
|
115 |
similarities_of_augmented_set_of_diseases = (
|
116 |
get_similarities_among_diseases_uris(augmented_set_of_diseases)
|
117 |
)
|
118 |
df_similarities_augmented_set = get_similarities_df(
|
119 |
similarities_of_augmented_set_of_diseases
|
120 |
)
|
121 |
-
status.
|
122 |
-
df_similarities_augmented_set.style.background_gradient(cmap="viridis", axis=None)
|
123 |
-
)
|
124 |
-
status.json(similarities_of_augmented_set_of_diseases, expanded=True)
|
125 |
status.info(
|
126 |
f"Augmented set of diseases: {len(augmented_set_of_diseases)} diseases."
|
127 |
)
|
|
|
|
|
|
|
128 |
status.json(augmented_set_of_diseases, expanded=False)
|
129 |
status.divider()
|
130 |
# 6. Query the embeddings of the diseases related to each clinical trial (also in the DB), to get the most similar clinical trials to our set of diseases
|
@@ -193,12 +193,14 @@ We use the embeddings of the diseases to determine the similarity between them.
|
|
193 |
|
194 |
Specifically, it optimizes the following cost function:
|
195 |
$\\text{minimize} \\sum_{(h, r, t) \\in S} \\max(0, \\gamma + f(h, r, t) - f(h, r, t')) + \\sum_{(h, r, t) \\in S'} f(h, r, t)$
|
|
|
|
|
196 |
"""
|
197 |
)
|
198 |
try:
|
199 |
edges_to_show = []
|
200 |
labels_of_diseases = get_labels_of_diseases_from_uris(
|
201 |
-
|
202 |
)
|
203 |
uris_and_labels_of_diseases = dict(
|
204 |
zip(df_similarities_augmented_set.index, labels_of_diseases)
|
@@ -227,7 +229,7 @@ $\\text{minimize} \\sum_{(h, r, t) \\in S} \\max(0, \\gamma + f(h, r, t) - f(h,
|
|
227 |
Node(
|
228 |
id=disease,
|
229 |
label=disease,#uris_and_labels_of_diseases[disease],
|
230 |
-
size=
|
231 |
shape="circular",
|
232 |
)
|
233 |
for disease in df_similarities_augmented_set.index
|
@@ -290,6 +292,16 @@ with st.container():
|
|
290 |
with tabs[i]:
|
291 |
render_trial_details(trials[i])
|
292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
show_graph_of_all_diseases = False
|
294 |
if show_graph_of_all_diseases:
|
295 |
# If disease_names is not defined, define it
|
|
|
111 |
status.write(
|
112 |
"Augmenting the set of diseases by finding others with related embeddings..."
|
113 |
)
|
114 |
+
augmented_set_of_diseases = augment_the_set_of_diseaces(filtered_diseases_uris)
|
115 |
similarities_of_augmented_set_of_diseases = (
|
116 |
get_similarities_among_diseases_uris(augmented_set_of_diseases)
|
117 |
)
|
118 |
df_similarities_augmented_set = get_similarities_df(
|
119 |
similarities_of_augmented_set_of_diseases
|
120 |
)
|
121 |
+
#status.json(similarities_of_augmented_set_of_diseases, expanded=True)
|
|
|
|
|
|
|
122 |
status.info(
|
123 |
f"Augmented set of diseases: {len(augmented_set_of_diseases)} diseases."
|
124 |
)
|
125 |
+
status.table(
|
126 |
+
df_similarities_augmented_set.style.background_gradient(cmap="viridis", axis=None)
|
127 |
+
)
|
128 |
status.json(augmented_set_of_diseases, expanded=False)
|
129 |
status.divider()
|
130 |
# 6. Query the embeddings of the diseases related to each clinical trial (also in the DB), to get the most similar clinical trials to our set of diseases
|
|
|
193 |
|
194 |
Specifically, it optimizes the following cost function:
|
195 |
$\\text{minimize} \\sum_{(h, r, t) \\in S} \\max(0, \\gamma + f(h, r, t) - f(h, r, t')) + \\sum_{(h, r, t) \\in S'} f(h, r, t)$
|
196 |
+
|
197 |
+
By minimizing this cost function, the model learns the embeddings of the entities and relations that best represent the graph. The embeddings are then used to calculate the similarity between the diseases, which is shown in the graph.
|
198 |
"""
|
199 |
)
|
200 |
try:
|
201 |
edges_to_show = []
|
202 |
labels_of_diseases = get_labels_of_diseases_from_uris(
|
203 |
+
[f'http://identifiers.org/medgen/{disease}' for disease in augmented_set_of_diseases]
|
204 |
)
|
205 |
uris_and_labels_of_diseases = dict(
|
206 |
zip(df_similarities_augmented_set.index, labels_of_diseases)
|
|
|
229 |
Node(
|
230 |
id=disease,
|
231 |
label=disease,#uris_and_labels_of_diseases[disease],
|
232 |
+
size=50,
|
233 |
shape="circular",
|
234 |
)
|
235 |
for disease in df_similarities_augmented_set.index
|
|
|
292 |
with tabs[i]:
|
293 |
render_trial_details(trials[i])
|
294 |
|
295 |
+
|
296 |
+
st.markdown(
|
297 |
+
"""This app has been created in HackUPC 2024 by the team 'Klìnic'. The team members are:
|
298 |
+
- [Aldan Creo](https://acmc-website.web.app)
|
299 |
+
- [Matthias Seiler](https://www.linkedin.com/in/maseiler/)
|
300 |
+
- [Tanguyvans Vansnick](https://www.linkedin.com/in/tanguy-vansnick-44186a199/)
|
301 |
+
- [Arjit Samal](https://www.linkedin.com/in/arijit-samal1/)
|
302 |
+
"""
|
303 |
+
)
|
304 |
+
|
305 |
show_graph_of_all_diseases = False
|
306 |
if show_graph_of_all_diseases:
|
307 |
# If disease_names is not defined, define it
|
llm_res.py
CHANGED
@@ -309,17 +309,14 @@ def tagging_insights_from_json(data_json):
|
|
309 |
processed_json = process_dictionaty_with_llm_to_generate_response(data_json)
|
310 |
|
311 |
tagging_prompt = ChatPromptTemplate.from_template(
|
312 |
-
"""
|
313 |
-
You are an expert on clinicial trials and analysis of their reports.
|
314 |
|
315 |
-
|
316 |
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
"""
|
322 |
-
)
|
323 |
|
324 |
class Classification(BaseModel):
|
325 |
# description: str = Field(
|
|
|
309 |
processed_json = process_dictionaty_with_llm_to_generate_response(data_json)
|
310 |
|
311 |
tagging_prompt = ChatPromptTemplate.from_template(
|
312 |
+
"""Extract the desired information from the following JSON data.
|
|
|
313 |
|
314 |
+
Only extract the properties mentioned in the 'Classification' function. Output a list of the extracted properties, starting with [ and ending with ], for each of the properties.
|
315 |
|
316 |
+
Raw data (in JSON format):
|
317 |
+
{input}
|
318 |
+
"""
|
319 |
+
)
|
|
|
|
|
320 |
|
321 |
class Classification(BaseModel):
|
322 |
# description: str = Field(
|
utils.py
CHANGED
@@ -229,7 +229,7 @@ def filter_out_less_promising_diseases(info_dicts: List[Dict[str, Any]]) -> List
|
|
229 |
filtered_diseases = df_diseases_similarities.mean()[
|
230 |
df_diseases_similarities.mean() > mean - 0.2 * std
|
231 |
].index.tolist()
|
232 |
-
return filtered_diseases, df_diseases_similarities
|
233 |
|
234 |
|
235 |
def get_labels_of_diseases_from_uris(uris: List[str]) -> List[str]:
|
|
|
229 |
filtered_diseases = df_diseases_similarities.mean()[
|
230 |
df_diseases_similarities.mean() > mean - 0.2 * std
|
231 |
].index.tolist()
|
232 |
+
return [f'http://identifiers.org/medgen/{d}' for d in filtered_diseases], df_diseases_similarities
|
233 |
|
234 |
|
235 |
def get_labels_of_diseases_from_uris(uris: List[str]) -> List[str]:
|