|
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from utils import replace_link
|
|
|
|
DATASETS_ROOT = Path('dataset_zoo')
|
|
MODELZOO_TEMPLATE = """\
|
|
# Dataset Zoo Summary
|
|
|
|
In this page, we list [all datasets](#all-supported-datasets) we support. You can click the link to jump to the corresponding dataset pages.
|
|
|
|
## All supported datasets
|
|
|
|
* Number of datasets: {num_datasets}
|
|
{dataset_msg}
|
|
|
|
"""
|
|
|
|
|
|
def generate_datasets_pages():
|
|
dataset_list = Path('../../tools/data').glob('*/README.md')
|
|
num_datasets = 0
|
|
dataset_msgs = []
|
|
|
|
for file in dataset_list:
|
|
num_datasets += 1
|
|
|
|
copy = DATASETS_ROOT / file.parent.with_suffix('.md').name
|
|
|
|
with open(file, 'r') as f:
|
|
content = f.read()
|
|
|
|
title = re.match(r'^# Preparing (.*)', content).group(1)
|
|
content = replace_link(r'\[([^\]]+)\]\(([^)]+)\)', '[{}]({})', content,
|
|
file)
|
|
content = replace_link(r'\[([^\]]+)\]: (.*)', '[{}]: {}', content,
|
|
file)
|
|
dataset_msgs.append(f'\t - [{title}]({copy})')
|
|
|
|
with open(copy, 'w') as f:
|
|
f.write(content)
|
|
|
|
dataset_msg = '\n'.join(dataset_msgs)
|
|
|
|
modelzoo = MODELZOO_TEMPLATE.format(
|
|
num_datasets=num_datasets,
|
|
dataset_msg=dataset_msg,
|
|
)
|
|
|
|
with open('datasetzoo_statistics.md', 'w') as f:
|
|
f.write(modelzoo)
|
|
|
|
|
|
DATASETS_ROOT.mkdir(exist_ok=True)
|
|
generate_datasets_pages()
|
|
|