Spaces:

Agents-MCP-Hackathon
/

biocynthia-demo

Sleeping

App Files Files Community

biocynthia-demo / app.py

latticetower

fix typos

ca699c2 19 days ago

raw

history blame contribute delete

4.4 kB

	from typing import List, Dict
	import numpy as np
	import gradio as gr

	import data_utils



	def smiles2monomers(smiles: str) -> list[str]:
	"""
	Converts SMILES representation of a molecule to a list of monomers (also SMILES). Not implemented yet.
	"""
	return []


	def generate_monomers(num_monomers: int, monomers_vocab: List[str]) -> list[str]:
	"""
	Produces random list of monomers with num_monomers elements. Not implemented yet.
	"""
	return np.random.choice(monomers_vocab, num_monomers)


	def monomer2domains(monomer: str, is_start=False, is_final=False) -> list[str]:
	"""
	Converts a monomer to a list of domains.
	Not implemented yet.
	"""
	nrps_module = data_utils.module_generator.suggest_module(monomer, is_start=is_start, is_final=is_final)
	return nrps_module


	def monomers2modules(monomer_list: list[str], is_cyclic: bool=False) -> List[List[Dict[str, str]]]:
	"""
	Cyclicity flag is always ignored, since I haven't figured out yet how this is encoded in NRPS domains sequence.
	"""
	modules_data = []
	for index, monomer in enumerate(monomer_list):
	is_start = index == 0
	is_final = index == len(monomer_list)
	domains_list = monomer2domains(monomer, is_start=is_start, is_final=is_final)
	modules_data.append(domains_list)
	return modules_data


	# def find_bacteria(monomers: list[str]) -> list[str]:
	# """
	# Finds bacteria which might produce the target peptide.
	# Input: sequence of possible domains. Each domain is represented as a protein sequence.
	# Output: possible hist from blastp search.
	# """
	# return []


	# def letter_counter(word, letter):
	# """Count the occurrences of a specific letter in a word.

	# Args:
	# word: The word or phrase to analyze
	# letter: The letter to count occurrences of

	# Returns:
	# The number of times the letter appears in the word
	# """
	# return word.lower().count(letter.lower())

	def convert_to_fasta(modules_list):
	fasta_lines = []
	for imodule, module_data in enumerate(modules_list):
	for idomain, domain_data in enumerate(module_data):
	name = domain_data['name']
	sequence = domain_data['sequence']
	fasta_lines.extend(
	[
	f">module_{imodule:02d}_domain_{idomain:02d}_{name}",
	sequence
	]
	)
	return '\n'.join(fasta_lines)


	def generate_peptide_monomers(num_monomers: int):
	"""Produces the peptide constructed from specified number of monomer fragments.
	Currently the sequence is picked randomly from predefined collection of monomers (aminoacids and their D- isomers).

	Args:
	num_monomers: The number of monomer fragments in the resulting 'peptide'

	Returns:
	The string which is constructed from specified number of monomer fragments separated by commas,
	and the data for the corresponding domain sequences (for future searches with blastp, in .fasta format).

	"""
	MONOMER_NAMES = data_utils.load_monomers()
	monomers_list = generate_monomers(num_monomers, MONOMER_NAMES)
	modules_data = monomers2modules(monomers_list)

	#print(modules_data)
	return ",".join(monomers_list), convert_to_fasta(modules_data)



	if __name__ == "__main__":


	# demo = gr.Interface(
	# fn=letter_counter,
	# inputs=["text", "text"],
	# outputs="number",
	# title="Letter Counter",
	# description="Count how many times a letter appears in a word"
	# )



	with gr.Blocks(title="NRPS domains 'generator'") as demo:
	gr.Markdown("""# BioCynthia
	```
	There are bacteria in soil and sea
	They have what is called a B-G-C
	These genes produce some complex peptides
	And they might save our lives!
	```
	""")

	gr.Markdown("For more details on project goals and motivation, please refer to the README.md")

	frequency_slider = gr.Slider(
	minimum=2,
	maximum=10,
	step=1,
	value=3,
	label="Number of monomers in the target peptide"
	)
	gr.Interface(
	fn=generate_peptide_monomers,
	inputs=[frequency_slider],
	outputs=["text", "text"],
	)


	demo.launch(mcp_server=True)