Model_Cards_Writing_Tool

Sleeping

File size: 16,243 Bytes

# from yaml import load
from persist import persist, load_widget_state
import streamlit as st
from io import StringIO 
import tempfile
from pathlib import Path
import requests
from huggingface_hub import hf_hub_download, upload_file
import pandas as pd
from huggingface_hub import create_repo
import os
from datetime import date
from middleMan import parse_into_jinja_markdown as pj

import requests

@st.cache
def get_icd():
    # Get ICD10 list
    token_endpoint = 'https://icdaccessmanagement.who.int/connect/token'
    client_id = '3bc9c811-7f2e-4dab-a2dc-940e47a38fef_a6108252-4503-4ff7-90ab-300fd27392aa'
    client_secret = 'xPj7mleWf1Bilu9f7P10UQmBPvL5F6Wgd8/rJhO1T04='
    scope = 'icdapi_access'
    grant_type = 'client_credentials'
    # set data to post
    payload = {'client_id': client_id, 
            'client_secret': client_secret, 
            'scope': scope, 
            'grant_type': grant_type}
    # make request
    r = requests.post(token_endpoint, data=payload, verify=False).json()
    token = r['access_token']
    # access ICD API
    uri = 'https://id.who.int/icd/release/10/2019/C00-C75'
    # HTTP header fields to set
    headers = {'Authorization':  'Bearer '+token, 
            'Accept': 'application/json', 
            'Accept-Language': 'en',
            'API-Version': 'v2'}        
    # make request           
    r = requests.get(uri, headers=headers, verify=False)
    print("icd",r.json())
    icd_map =[]
    for child in r.json()['child']: 
        r_child = requests.get(child, headers=headers,verify=False).json()
        icd_map.append(r_child["code"]+" "+r_child["title"]["@value"])
    return icd_map

@st.cache    
def get_treatment_mod():
    url = "https://clinicaltables.nlm.nih.gov/loinc_answers?loinc_num=21964-2"
    r = requests.get(url).json()
    treatment_mod = [treatment['DisplayText'] for treatment in r]
    return treatment_mod


@st.cache
def get_cached_data():
    languages_df = pd.read_html("https://hf.co/languages")[0]
    languages_map = pd.Series(languages_df["Language"].values, index=languages_df["ISO code"]).to_dict()

    license_df = pd.read_html("https://huggingface.co/docs/hub/repositories-licenses")[0]
    license_map = pd.Series(
        license_df["License identifier (to use in repo card)"].values, index=license_df.Fullname
    ).to_dict()

    available_metrics = [x['id'] for x in requests.get('https://huggingface.co/api/metrics').json()]

    r = requests.get('https://huggingface.co/api/models-tags-by-type')
    tags_data = r.json()
    libraries = [x['id'] for x in tags_data['library']]
    tasks = [x['id'] for x in tags_data['pipeline_tag']]

    icd_map = get_icd()
    treatment_mod = get_treatment_mod()
    return languages_map, license_map, available_metrics, libraries, tasks, icd_map, treatment_mod


def card_upload(card_info,repo_id,token):
    #commit_message=None,
    repo_type = "model"
    commit_description=None,
    revision=None
    create_pr=None
    with tempfile.TemporaryDirectory() as tmpdir:
        tmp_path = Path(tmpdir) / "README.md"
        tmp_path.write_text(str(card_info))
        url = upload_file(
                    path_or_fileobj=str(tmp_path),
                    path_in_repo="README.md",
                    repo_id=repo_id,
                    token=token,
                    repo_type=repo_type,
                    # identical_ok=True,
                    revision=revision
                )
    return url

def images_upload(images_list,repo_id,token):
    repo_type = "model"
    commit_description=None,
    revision=None
    create_pr=None
    for img in images_list:
        if img is not None:
            with tempfile.TemporaryDirectory() as tmpdir:
                tmp_path = Path(tmpdir) / "README.md"
                tmp_path.write_text(str(img))
                url = upload_file(
                            path_or_fileobj=str(tmp_path),
                            path_in_repo="README.md",
                            repo_id=repo_id,
                            token=token,
                            repo_type=repo_type,
                            # identical_ok=True,
                            revision=revision
                        )
    return url

def validate(self, repo_type="model"):
    """Validates card against Hugging Face Hub's model card validation logic.

    Using this function requires access to the internet, so it is only called

    internally by `modelcards.ModelCard.push_to_hub`.

    Args:

        repo_type (`str`, *optional*):

            The type of Hugging Face repo to push to. Defaults to None, which will use

            use "model". Other options are "dataset" and "space".

    """
    if repo_type is None:
        repo_type = "model"

    # TODO - compare against repo types constant in huggingface_hub if we move this object there.
    if repo_type not in ["model", "space", "dataset"]:
        raise RuntimeError(
            "Provided repo_type '{repo_type}' should be one of ['model', 'space',"
            " 'dataset']."
        )

    body = {
        "repoType": repo_type,
        "content": str(self),
    }
    headers = {"Accept": "text/plain"}

    try:
        r = requests.post(
            "https://huggingface.co/api/validate-yaml", body, headers=headers
        )
        r.raise_for_status()
    except requests.exceptions.HTTPError as exc:
        if r.status_code == 400:
            raise RuntimeError(r.text)
        else:
            raise exc


## Save uploaded [markdown] file to directory to be used by jinja parser function 
def save_uploadedfile(uploadedfile):
    with open(uploadedfile.name,"wb") as f:
        f.write(uploadedfile.getbuffer())
    st.success("Saved File:{} to temp_uploaded_filed_Dir".format(uploadedfile.name))
    return uploadedfile.name 


def main_page():
    today=date.today()
    		
    if "model_name" not in st.session_state:
        # Initialize session state.
        st.session_state.update({
            # Model Basic Information
            "model_version": 0,
            "icd10": [],
            "treatment_modality": [],
            "prescription_levels": [],
            "additional_information": "",
            "motivation": "",
            "model_class":"",
            "creation_date": today,
            "architecture": "",
            "model_developers": "",
            "funded_by":"",
            "shared_by":"",
            "license": "",
            "finetuned_from": "",
            "research_paper": "",
            "git_repo": "",
            # Technical Specifications
            "nb_parameters": 5,
            "input_channels": [],
            "loss_function": "",
            "batch_size": 1,
            "patch_dimension": [],
            "architecture_filename":None,
            "libraries":[],
            "hardware": "",
            "inference_time": 10,
            "get_started_code": "",
            # Training Details
            "training_set_size":10,
            "validation_set_size":10,
            "age_fig_filename":"",
            "sex_fig_filename":"",
            "dataset_source": "",
            "acquisition_from": today,
            "acquisition_to": today,
            "markdown_upload": ""
        })
    ## getting cache for each warnings 
    languages_map, license_map, available_metrics, libraries, tasks, icd_map, treatment_mod = get_cached_data()

    ## form UI setting 
    st.header("Model basic information (Dose prediction)")

    warning_placeholder = st.empty()

    st.text_input("Model Name", key=persist("model_name"))
    st.number_input("Version",key=persist("model_version"),step=0.1)
    st.text("Intended use:")
    left, right = st.columns([4,2])
    left.multiselect("Treatment site ICD10",list(icd_map), help="Reference ICD10 WHO: https://icd.who.int/icdapi",key=persist("icd10"))
    right.multiselect("Treatment modality", list(treatment_mod), help="Reference LOINC Modality Radiation treatment: https://loinc.org/21964-2", key=persist("treatment_modality"))
    left, right = st.columns(2)
    nlines = int(left.number_input("Number of prescription levels", 0, 20, 1))
    # cols = st.columns(ncol)
    for i in range(nlines):
        right.number_input(f"Prescription [Gy] # {i}", key=i)
    st.text_area("Additional information", placeholder = "Bilateral cases only", help="E.g. Bilateral cases only", key=persist('additional_information'))
    st.text_area("Motivation for development", key=persist('motivation'))
    st.text_area("Class", placeholder="RULE 11, FROM MDCG 2021-24", key=persist('model_class'))
    st.date_input("Creation date", key=persist('creation_date'))
    st.text_area("Type of architecture",value="UNet", key=persist('architecture'))

    st.text("Developed by:")
    left, middle, right = st.columns(3)
    left.text_input("Name", key=persist('dev_name'))
    middle.text_input("Institution", placeholder = "University/clinic/company", key=persist('dev_institution'))
    right.text_input("Email", key=persist('dev_email'))

    st.text_area("Funded by", key=persist('funded_by'))
    st.text_area("Shared by", key=persist('shared_by'))
    st.selectbox("License", [""] + list(license_map.values()), help="The license associated with this model.", key=persist("license"))
    st.text_area("Fine tuned from model", key=persist('finetuned_from'))
    st.text_area("Related Research Paper", help="Research paper related to this model.", key=persist("research_paper"))
    st.text_input("Related GitHub Repository", help="Link to a GitHub repository used in the development of this model", key=persist("git_repo"))
    # st.selectbox("Library Name", [""] + libraries, help="The name of the library this model came from (Ex. pytorch, timm, spacy, keras, etc.). This is usually automatically detected in model repos, so it is not required.", key=persist('library_name'))
    # st.text_input("Parent Model (URL)", help="If this model has another model as its base, please provide the URL link to the parent model", key=persist("Parent_Model_name"))
    # st.text_input("Datasets (comma separated)", help="The dataset(s) used to train this model. Use dataset id from https://hf.co/datasets.", key=persist("datasets"))
    # st.multiselect("Metrics", available_metrics, help="Metrics used in the training/evaluation of this model. Use metric id from https://hf.co/metrics.", key=persist("metrics"))
    # st.selectbox("Task", [""] + tasks, help="What task does this model aim to solve?", key=persist('task'))
    # st.text_input("Tags (comma separated)", help="Additional tags to add which will be filterable on https://hf.co/models. (Ex. image-classification, vision, resnet)", key=persist("tags"))
    # st.text_input("Author(s) (comma separated)", help="The authors who developed this model. If you trained this model, the author is you.", key=persist("the_authors"))
    # s
    # st.text_input("Carbon Emitted:", help="You can estimate carbon emissions using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700)", key=persist("Model_c02_emitted"))
   
    # st.header("Technical specifications")
    # st.header("Training data, methodology, and results")
    # st.header("Evaluation data, methodology, and results / commissioning")
    # st.header("Ethical use considerations")

    # warnings setting
    # languages=st.session_state.languages or None
    license=st.session_state.license or None
    task = None #st.session_state.task or None
    markdown_upload = st.session_state.markdown_upload
    #uploaded_model_card = st.session_state.uploaded_model 
    # Handle any warnings...
    do_warn = False
    warning_msg = "Warning: The following fields are required but have not been filled in: "
    if not license:
        warning_msg += "\n- License"
        do_warn = True
    if do_warn:
        warning_placeholder.error(warning_msg)

    with st.sidebar:

        ######################################################
        ### Uploading a model card from local drive
        ######################################################
        st.markdown("## Upload Model Card")
       
        st.markdown("#### Model Card must be in markdown (.md) format.") 

        # Read a single file 
        uploaded_file = st.file_uploader("Choose a file", type = ['md'], help = 'Please choose a markdown (.md) file type to upload')
        if uploaded_file is not None:
            name_of_uploaded_file = save_uploadedfile(uploaded_file)
           
            st.session_state.markdown_upload = name_of_uploaded_file ## uploaded model card

        # elif st.session_state.task =='fill-mask' or 'translation' or 'token-classification' or ' sentence-similarity' or 'summarization' or 'question-answering' or 'text2text-generation' or 'text-classification' or 'text-generation' or 'conversational':
        #     print("YO",st.session_state.task)
        #     st.session_state.markdown_upload = "language_model_template1.md" ## language model template
        
        else:#if st.session_state.task:
            
            st.session_state.markdown_upload =  "current_card.md" ## default non language model template
        print("st.session_state.markdown_upload",st.session_state.markdown_upload)
        #########################################
        ### Uploading model card to HUB
        #########################################
        out_markdown =open( st.session_state.markdown_upload, "r+"
            ).read() 
        print_out_final = f"{out_markdown}"
        st.markdown("## Export Loaded Model Card to Hub")
        with st.form("Upload to 🤗 Hub"):
            st.markdown("Use a token with write access from [here](https://hf.co/settings/tokens)")
            token = st.text_input("Token", type='password')
            repo_id = st.text_input("Repo ID")
            submit = st.form_submit_button('Upload to 🤗 Hub', help='The current model card will be uploaded to a branch in the supplied repo ')

        if submit:
            if len(repo_id.split('/')) == 2:
                repo_url = create_repo(repo_id, exist_ok=True, token=token)
                new_url = card_upload(pj(),repo_id, token=token)
                # images_upload([st.session_state['architecture_filename'], st.session_state["age_fig_filename"], st.session_state["sex_fig_filename"]],repo_id, token=token)
                st.success(f"Pushed the card to the repo [here]({new_url})!") # note: was repo_url
            else:
                st.error("Repo ID invalid. It should be username/repo-name. For example: nateraw/food")
        

        #########################################
        ### Download model card
        #########################################


        st.markdown("## Download current Model Card")

        if st.session_state.model_name is None or st.session_state.model_name== ' ':
            downloaded_file_name = 'current_model_card.md'
        else: 
            downloaded_file_name = st.session_state.model_name+'_'+'model_card.md'
        download_status = st.download_button(label = 'Download Model Card', data = pj(), file_name = downloaded_file_name, help = "The current model card will be downloaded as a markdown (.md) file")
        if download_status == True:
            st.success("Your current model card, successfully downloaded 🤗")


def page_switcher(page):
    st.session_state.runpage = page    

def main():
    
    st.header("About Model Cards")
    st.markdown(Path('about.md').read_text(), unsafe_allow_html=True)
    btn = st.button('Create a Model Card 📝',on_click=page_switcher,args=(main_page,))
    if btn:
        st.experimental_rerun() # rerun is needed to clear the page

if __name__ == '__main__':
    load_widget_state()
    if 'runpage' not in st.session_state :
        st.session_state.runpage = main
    st.session_state.runpage()