Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| def create_synthetic_data(job_skills, education, title_job, location, age_DS, num_rows=2000): | |
| if isinstance(job_skills, str): | |
| job_skills = [job_skills] | |
| if isinstance(education, str): | |
| education = [education] | |
| if isinstance(title_job, str): | |
| title_job = [title_job] | |
| if isinstance(location, str): | |
| location = [location] | |
| if isinstance(age_DS, str): | |
| age_DS = [age_DS] | |
| features = job_skills + education + title_job + location + age_DS | |
| data = np.random.randint(2, size=(num_rows, len(features))) | |
| df = pd.DataFrame(data, columns=features) | |
| df['initial_TARGET'] = df.sum(axis=1) | |
| min_target = df['initial_TARGET'].min() | |
| max_target = df['initial_TARGET'].max() | |
| df['TARGET'] = (df['initial_TARGET'] - min_target) * (100 / (max_target - min_target)) | |
| df.drop(columns=['initial_TARGET'], inplace=True) | |
| df.loc[df.sum(axis=1) == 0, 'TARGET'] = 0 | |
| df.loc[df.sum(axis=1) == len(features), 'TARGET'] = 100 | |
| return df |