File size: 766 Bytes
0f3dbb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os

import pandas as pd
from zeno import DistillReturn, distill

# read file from same directory as this file
area_df = pd.read_csv(os.path.dirname(__file__) + "/country-areas.csv")
# set index to country
area_df.set_index("country", inplace=True)
# given a string like 23,180 (8,950) extract the first number

area_df = area_df[area_df["land"] != "not determined"]
area_df["area"] = area_df["land"].apply(
    lambda x: x if isinstance(x, float) else float(x.split(" ")[0].replace(",", ""))
)


@distill
def area(df, ops):
    areas = []
    for output in df[ops.label_column]:
        if output in area_df.index:
            areas.append(area_df.loc[output]["area"])
        else:
            areas.append(-1)
    return DistillReturn(distill_output=areas)