Spaces:
No application file
No application file
File size: 3,310 Bytes
6755a2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import numpy as np
import pickle
from os.path import join as pjoin
POS_enumerator = {
'VERB': 0,
'NOUN': 1,
'DET': 2,
'ADP': 3,
'NUM': 4,
'AUX': 5,
'PRON': 6,
'ADJ': 7,
'ADV': 8,
'Loc_VIP': 9,
'Body_VIP': 10,
'Obj_VIP': 11,
'Act_VIP': 12,
'Desc_VIP': 13,
'OTHER': 14,
}
Loc_list = ('left', 'right', 'clockwise', 'counterclockwise', 'anticlockwise', 'forward', 'back', 'backward',
'up', 'down', 'straight', 'curve')
Body_list = ('arm', 'chin', 'foot', 'feet', 'face', 'hand', 'mouth', 'leg', 'waist', 'eye', 'knee', 'shoulder', 'thigh')
Obj_List = ('stair', 'dumbbell', 'chair', 'window', 'floor', 'car', 'ball', 'handrail', 'baseball', 'basketball')
Act_list = ('walk', 'run', 'swing', 'pick', 'bring', 'kick', 'put', 'squat', 'throw', 'hop', 'dance', 'jump', 'turn',
'stumble', 'dance', 'stop', 'sit', 'lift', 'lower', 'raise', 'wash', 'stand', 'kneel', 'stroll',
'rub', 'bend', 'balance', 'flap', 'jog', 'shuffle', 'lean', 'rotate', 'spin', 'spread', 'climb')
Desc_list = ('slowly', 'carefully', 'fast', 'careful', 'slow', 'quickly', 'happy', 'angry', 'sad', 'happily',
'angrily', 'sadly')
VIP_dict = {
'Loc_VIP': Loc_list,
'Body_VIP': Body_list,
'Obj_VIP': Obj_List,
'Act_VIP': Act_list,
'Desc_VIP': Desc_list,
}
class WordVectorizer(object):
def __init__(self, meta_root, prefix):
vectors = np.load(pjoin(meta_root, '%s_data.npy'%prefix))
words = pickle.load(open(pjoin(meta_root, '%s_words.pkl'%prefix), 'rb'))
self.word2idx = pickle.load(open(pjoin(meta_root, '%s_idx.pkl'%prefix), 'rb'))
self.word2vec = {w: vectors[self.word2idx[w]] for w in words}
def _get_pos_ohot(self, pos):
pos_vec = np.zeros(len(POS_enumerator))
if pos in POS_enumerator:
pos_vec[POS_enumerator[pos]] = 1
else:
pos_vec[POS_enumerator['OTHER']] = 1
return pos_vec
def __len__(self):
return len(self.word2vec)
def __getitem__(self, item):
word, pos = item.split('/')
if word in self.word2vec:
word_vec = self.word2vec[word]
vip_pos = None
for key, values in VIP_dict.items():
if word in values:
vip_pos = key
break
if vip_pos is not None:
pos_vec = self._get_pos_ohot(vip_pos)
else:
pos_vec = self._get_pos_ohot(pos)
else:
word_vec = self.word2vec['unk']
pos_vec = self._get_pos_ohot('OTHER')
return word_vec, pos_vec
class WordVectorizerV2(WordVectorizer):
def __init__(self, meta_root, prefix):
super(WordVectorizerV2, self).__init__(meta_root, prefix)
self.idx2word = {self.word2idx[w]: w for w in self.word2idx}
def __getitem__(self, item):
word_vec, pose_vec = super(WordVectorizerV2, self).__getitem__(item)
word, pos = item.split('/')
if word in self.word2vec:
return word_vec, pose_vec, self.word2idx[word]
else:
return word_vec, pose_vec, self.word2idx['unk']
def itos(self, idx):
if idx == len(self.idx2word):
return "pad"
return self.idx2word[idx] |