Image-Text-to-Text
Transformers
English
qwen2_vl
conversational
VLM2Vec-Qwen2VL-2B / eval /A-OKVQA_pred.txt
memray's picture
Upload 84 files
2fbff2e verified
('teeth', '')
('thirty', '')
('water', '')
('frosting', '')
('phone', '')
('animals', '')
('drying hair', '')
('handle', '')
('waterboarding', '')
('denim', '')
('united states', '')
('cake', '')
('grapes', '')
('cough drops', '')
('united states', '')
('paper', '')
('sunset', '')
('i do', '')
('holiday dinner', '')
('fondant', '')
('serve', '')
('frisbee', '')
('animals', '')
('ski resort', '')
('airplane', '')
('bottom', '')
('passenger loading', '')
('nintendo wii', '')
('heat', '')
('bottom', '')
('farm', '')
('white bag', '')
('happy birthday', '')
('playing baseball', '')
('shampoo conditioner', '')
('wii boxing', '')
('home run', '')
('building', '')
('grassy', '')
('metal', '')
('animals', '')
('bathroom', '')
('kitchen', '')
('cutting food', '')
('tissues', '')
('tie dyed', '')
('japanese', '')
('call letters', '')
('safety', '')
('spend time', '')
('bottom', '')
('commercial', '')
('nothing', '')
('bedroom', '')
('pizza party', '')
('dirt bikes', '')
('new york', '')
('fruit', '')
('parking', '')
('left', '')
('france', '')
('christmas', '')
('sports car', '')
('tennis balls', '')
('falling', '')
('mushrooms', '')
('fruit', '')
('white car', '')
('safety', '')
('poles', '')
('microwave', '')
('stone', '')
('jessica biel', '')
('frosting', '')
('propulsion', '')
('girl man', '')
('to travel', '')
('kite', '')
('cell phone', '')
('mushrooms', '')
('to transport', '')
('striped', '')
('paris', '')
('africa', '')
('garage', '')
('window', '')
('dipping', '')
('sunlight', '')
('both', '')
('building', '')
('fireplace', '')
('rain', '')
('to eat', '')
('new york', '')
('skiing', '')
('city', '')
('pitcher', '')
('camera', '')
('parking', '')
('room', '')
('xena', '')
('art show', '')
('equestrianism', '')
('green', '')
('sisters', '')
('drinks', '')
('palm tree', '')
('sails', '')
('mexican', '')
('school', '')
('diagonal', '')
('frosting', '')
('twentieth', '')
('feline', '')
('dodge', '')
('grill', '')
('feline', '')
('traffic light', '')
('grazing', '')
('spectating', '')
('bow tie', '')
('mason jar', '')
('vegetarians', '')
('bananas', '')
('liquer', '')
('broccoli', '')
('disneyland', '')
('picture', '')
('middle', '')
('raining', '')
('picture', '')
('basement', '')
('canadian geese', '')
('britain', '')
('tennis', '')
('red stripe', '')
('cool', '')
('tusks', '')
('waiting room', '')
('red bus', '')
('airplane', '')
('suitcase', '')
('to transport', '')
('train tracks', '')
('taking selfie', '')
('speakers', '')
('seattle', '')
('smiling', '')
('nintendo wii', '')
('investments', '')
('oranges', '')
('tissues', '')
('none', '')
('ground', '')
('one', '')
('omaha', '')
('oncoming traffic', '')
('nothing', '')
('advertisement', '')
('nintendo wii', '')
('europe', '')
('margherita', '')
('skating', '')
('stone', '')
('running', '')
('eating leaves', '')
('commercial', '')
('stove/oven', '')
('meat', '')
('hooves', '')
('hp', '')
('fork', '')
('building', '')
('animals', '')
('animals', '')
('wax', '')
('vegetarian', '')
('girl man', '')
('stripes', '')
('four', '')
('liquer', '')
('classmates', '')
('liquer', '')
('red bus', '')
('plastic', '')
('brown', '')
('storage', '')
('toaster', '')
('to travel', '')
('japanese', '')
('flying', '')
('red wine', '')
('basement', '')
('thriller fiction', '')
('four', '')
('room', '')
('road', '')
('on beach', '')
('plowing', '')
('fridge', '')
('sunlight', '')
('protection', '')
('christmas', '')
('to transport', '')
('spiderman', '')
('throw ball', '')
('throw ball', '')
('mother', '')
('eat them', '')
('girl man', '')
('wimbledon', '')
('clothing', '')
('apple', '')
('ten', '')
('public transport', '')
('carrot', '')
('farm', '')
('plowing', '')
('storage', '')
('airport', '')
('truck', '')
('wire ties', '')
('skateboard', '')
('grandparents', '')
('trash', '')
('stop', '')
('athlete', '')
('missed', '')
('food', '')
('wire ties', '')
('oranges', '')
('forest', '')
('pacifier', '')
('parade', '')
('yeast', '')
('dinner', '')
('sisters', '')
('remote controls', '')
('four', '')
('vespa', '')
('disposable', '')
('raining', '')
('underground', '')
('flying', '')
('liquer', '')
('london', '')
('europe', '')
('bottom', '')
('girl man', '')
('hot sauce', '')
('tokyo', '')
('waterboarding', '')
('flower', '')
('ten', '')
('for bikes', '')
('21', '')
('room', '')
('riding them', '')
('talk show', '')
('horses', '')
('banana split', '')
('pushing off', '')
('eat', '')
('white car', '')
('station', '')
('snowboarding', '')
('middle', '')
('four', '')
('spiderman', '')
('little league', '')
('thirty', '')
('white bag', '')
('eating', '')
('living room', '')
('singing', '')
('cutting food', '')
('residential', '')
('laptop', '')
('candle', '')
('yellow', '')
('wok', '')
('clothing', '')
('candle', '')
('classmates', '')
('peel it', '')
('nothing', '')
('animals', '')
('bottom', '')
('beer', '')
('fence', '')
('fall season', '')
('girl man', '')
('striped', '')
('zero', '')
('coke', '')
('public transport', '')
('mouse', '')
('forehand', '')
('parallel', '')
('cement', '')
('four', '')
('suitcase', '')
('for shade', '')
('headphones', '')
('beer', '')
('four', '')
("don't walk", '')
('suitcase', '')
('room', '')
('shoes', '')
('orange', '')
('cotton', '')
('computer', '')
('herding', '')
('breakfast', '')
('two', '')
('yellow', '')
('zebra', '')
('lighting', '')
('children', '')
('skateboard', '')
('business', '')
('knife', '')
('handle', '')
('whale', '')
('new york', '')
('nike', '')
('raining', '')
('taking selfie', '')
('sports car', '')
('dirt', '')
('building', '')
('white van', '')
('cooling', '')
('to catch', '')
('four', '')
('cool', '')
('doorway', '')
('four', '')
('grassy', '')
('clay', '')
('morning', '')
('grassy', '')
('forehand', '')
('balls', '')
('green', '')
("don't walk", '')
('teapot', '')
('oncoming traffic', '')
('rain protection', '')
('black', '')
('stone', '')
('shoes', '')
('time lapse', '')
('grey', '')
('statues', '')
('four', '')
('red wine', '')
('heat', '')
('cruise', '')
('animals', '')
('nintendo wii', '')
('bottom', '')
('forehand', '')
('to transport', '')
('glass', '')
('imac', '')
('united states', '')
('omaha', '')
('holding pencils', '')
('cigarette', '')
('hp', '')
('academic', '')
('shoes', '')
('pickles', '')
('parallel', '')
('four', '')
('bottom', '')
('safety', '')
('one', '')
('nike', '')
('icing', '')
('home plate', '')
('meat', '')
('love us', '')
('drinks', '')
('striped', '')
('safety', '')
('four', '')
('helium', '')
('afternoon', '')
('to balance', '')
('home', '')
('weekend', '')
('black', '')
('fish eye', '')
('airplane', '')
('donut', '')
('broccoli', '')
('zero', '')
('herd sheep', '')
('playful', '')
('victorian', '')
('running', '')
('in shape', '')
('animals', '')
('equestrianism', '')
('asian', '')
('fireplace', '')
('cellular', '')
('cow', '')
('hotel', '')
('wire ties', '')
('peapod', '')
('playing baseball', '')
('ipod', '')
('sleep', '')
('little league', '')
('sadness', '')
('meat', '')
('canada', '')
('drinks', '')
('summer', '')
('three', '')
('mushrooms', '')
('shade', '')
('home run', '')
('drinks', '')
('building', '')
('hooves', '')
('hotel', '')
('white', '')
('africa', '')
('gloves', '')
('ground', '')
('to eat', '')
('fruit', '')
('same color', '')
('visibility', '')
('on beach', '')
('yellow', '')
('bedroom', '')
('chairlift', '')
('dirt', '')
('sunset', '')
('for shade', '')
('yellow', '')
('olive oil', '')
('kayaking', '')
('wine tasting', '')
('holding pencils', '')
('window', '')
('dirty', '')
('motorcycle', '')
('to eat', '')
('chariot racing', '')
('stir', '')
('grass', '')
('sitting', '')
('liquer', '')
('four', '')
('hp', '')
('safety', '')
('snow', '')
('wimbledon', '')
('britain', '')
('purse', '')
('taking selfie', '')
('love us', '')
('rain protection', '')
('to race', '')
('herding', '')
('travel', '')
('f', '')
('21', '')
('camera', '')
('vegetarians', '')
('elephant', '')
('airplane', '')
('to travel', '')
('crust', '')
('poles', '')
('grass', '')
('soda', '')
('electric', '')
('forehand', '')
('coke', '')
('body surf', '')
('tourists', '')
('to ride', '')
('us', '')
('rail grind', '')
('living room', '')
('cake', '')
('laughing', '')
('knight', '')
('office', '')
('for shade', '')
('getting married', '')
('sightseeing rides', '')
('crossing street', '')
('cologne germany', '')
('jockeys', '')
('coffee', '')
('usps', '')
('in shape', '')
('1369', '')
('wrinkles', '')
('smoking', '')
('eating', '')
('museum', '')
('pottery', '')
('farm', '')
('clean', '')
('ten', '')
('getting married', '')
('airport', '')
('bug shield', '')
('fighter jets', '')
('bartender', '')
('clean', '')
('four', '')
('watch tv', '')
('protect it', '')
('building', '')
('badminton', '')
('stretching', '')
('xena', '')
('sisters', '')
('one', '')
('girl man', '')
('coach', '')
('sails', '')
('fire hydrants', '')
('tokyo', '')
('cement', '')
('for bikes', '')
('hats', '')
('classmates', '')
('frosting', '')
('raining', '')
('glass', '')
('electric', '')
('sails', '')
('rail grind', '')
('wine', '')
('getting married', '')
('window', '')
('backyard', '')
('running', '')
('moving', '')
('sisters', '')
('chess', '')
('drainage', '')
('octopus', '')
('electric', '')
('tennis', '')
('giraffe', '')
('john mcenroe', '')
('pedestrian', '')
('pulling it', '')
('mousepad', '')
('exclamation', '')
('catcher', '')
('dock', '')
('kimberly elise', '')
('red velvet', '')
('eat food', '')
('illinois', '')
('protect it', '')
('airplane', '')
('building', '')
('return serve', '')
('left hand', '')
('sleeping bag', '')
('middle', '')
('i do', '')
('serve', '')
('zebra', '')
('donuts', '')
('buildings', '')
('picture', '')
('seventeen', '')
('camera', '')
('cow', '')
('building', '')
('buses', '')
('for shade', '')
('meat', '')
('cooler', '')
('utensils', '')
('clouds', '')
('michael', '')
('glass', '')
('groom', '')
('baseball bat', '')
('water', '')
('speakers', '')
('charcuterie', '')
('picture', '')
('white', '')
('fall season', '')
('domestic pet', '')
('black', '')
('north bergen', '')
('seniors', '')
('passenger loading', '')
('metal', '')
('ocean', '')
('east asia', '')
('watch tv', '')
('thirty', '')
('asian', '')
('eat food', '')
('phone', '')
('feline', '')
('police officer', '')
('churning water', '')
('large closet', '')
('yogurt', '')
('snow boarding', '')
('four', '')
('tusk', '')
('east asia', '')
('car wash', '')
('pelican', '')
('animals', '')
('both', '')
('four', '')
('i do', '')
('room', '')
('canadian geese', '')
('sisters', '')
('serve', '')
('sunlight', '')
('fun', '')
('squatting', '')
('pizzeria', '')
('liquer', '')
('eye protection', '')
('flag', '')
('fish', '')
('donald duck', '')
('pulling it', '')
('four', '')
('girl man', '')
('grilled', '')
('20-30 years', '')
('checkerboard', '')
('afternoon', '')
('balls', '')
('protect it', '')
('eat', '')
('eat food', '')
('match', '')
('mouse', '')
('fence', '')
('watch tv', '')
('jeep', '')
('train tracks', '')
('bunt', '')
('buses', '')
('liquer', '')
('catcher', '')
('camera', '')
('plowing', '')
('australia', '')
('doughnuts', '')
('running', '')
('red', '')
('frosting', '')
('islam', '')
('nintendo wii', '')
('park', '')
('europe', '')
('forest', '')
('pulling it', '')
('stir frying', '')
('snow', '')
('elephant', '')
('bunt', '')
('truck', '')
('compact fluorescent', '')
('sunlight', '')
('catcher', '')
('fall season', '')
('coke', '')
('thirty', '')
('pizzeria', '')
('striped', '')
('napping', '')
('parade', '')
('usps', '')
('6 feet', '')
('herding', '')
('liquer', '')
('june', '')
('exhibition', '')
('train tracks', '')
('speeding', '')
('awaiting greenlight', '')
('parking', '')
('plastic', '')
('jumping', '')
('clown fish', '')
('tusks', '')
('bow tie', '')
('film', '')
('motorcycle dealer', '')
('toy story', '')
('tomato', '')
('electric', '')
('fence', '')
('powdered sugar', '')
('cigarette', '')
('fence', '')
('breakfast', '')
('handle', '')
('glass', '')
('donuts', '')
('zero', '')
('sails', '')
('kitchen', '')
('hotel room', '')
('crt', '')
('cutting food', '')
('devil', '')
('top', '')
('legos', '')
('picture taking', '')
('ocean', '')
('hereford', '')
('fall season', '')
('seventeen', '')
('downhill', '')
('pelican', '')
('coke', '')
('farm', '')
('tropical', '')
('plaid', '')
('nothing', '')
('smoking', '')
('paper', '')
('headset', '')
('cake', '')
('running', '')
('sarong', '')
('gas', '')
('chess', '')
('bottom', '')
('eye protection', '')
('gloves', '')
('curiosity', '')
('japan', '')
('construction worker', '')
('heat', '')
('christmas', '')
('john mcenroe', '')
('thirty', '')
('fondant', '')
('zero', '')
('plate', '')
('fruit', '')
('to catch', '')
('onion', '')
('dirt', '')
('dinner', '')
('apple', '')
('mustang', '')
('oncoming traffic', '')
('forest', '')
('home run', '')
('squatting', '')
('resort', '')
('clean', '')
('britain', '')
('frisbee', '')
('roasted', '')
('fluffy', '')
('porcelain', '')
('margherita', '')
('utensils', '')
('breakfast', '')
('us', '')
('residential', '')
('laying', '')
('alcohol', '')
('grapes', '')
('egypt', '')
('animals', '')
('mountain', '')
('fashion', '')
('cars', '')
('metal', '')
('four', '')
('restroom', '')
('plastic', '')
('cross street', '')
('skateboard', '')
('serve', '')
('staring', '')
('sadness', '')
('to sell', '')
('drying hands', '')
('mouse', '')
('dodgers', '')
('printing', '')
('four', '')
('laptop', '')
('oncoming traffic', '')
('diagonal', '')
('bottom', '')
('lgbt', '')
('tennis competition', '')
('clown fish', '')
('love us', '')
('europe', '')
('red light', '')
('smiling', '')
('fridge', '')
('carrot', '')
('animals', '')
('propulsion', '')
('absolutely no', '')
('console', '')
('flying', '')
('tractor', '')
('ceiling', '')
('building', '')
('one', '')
('union jack', '')
('photographers', '')
('baseball cap', '')
('produce', '')
('forehand', '')
('downhill', '')
('pointsettia', '')
('oncoming traffic', '')
('smoke', '')
('cell phone', '')
('grip', '')
('pushing off', '')
('united states', '')
('christianity', '')
('ten', '')
('pizzeria', '')
('take picture', '')
('red wine', '')
('passenger transport', '')
('taking selfie', '')
('girl man', '')
('picture', '')
('remote controls', '')
('ranch', '')
('commercial', '')
('left hand', '')
('surfboard', '')
('baseball bat', '')
('volvo', '')
('for reception', '')
('love us', '')
('academic', '')
('grill', '')
('bottom', '')
('fireplace', '')
('bottom', '')
('jumping', '')
('cellular', '')
('raining', '')
('sisters', '')
('balls', '')
('back seat', '')
('laptop', '')
('to sell', '')
('drinking', '')
('icing', '')
('pepsi', '')
('skyline', '')
('teddy bear', '')
('parallel', '')
('rail grind', '')
('herding', '')
('formal', '')
('seattle', '')
('analog', '')
('pulling it', '')
('90', '')
('tennis', '')
('herd sheep', '')
('fruit', '')
('glasses', '')
('europe', '')
('bananas', '')
('snow', '')
('trophy', '')
('seniors', '')
('bottom', '')
('building', '')
('building', '')
('soda', '')
('cook', '')
('desserts', '')
('to transport', '')
('island', '')
('tennis balls', '')
('girl man', '')
('throw ball', '')
('cross street', '')
('london', '')
('united states', '')
('cocoa', '')
('four', '')
('volkswagen', '')
('fruit', '')
('heat', '')
('isuzu', '')
('top', '')
('in shape', '')
('picture', '')
('apple', '')
('snowboarding', '')
('grass', '')
('parallel', '')
('horses', '')
('public transport', '')
('building', '')
('commercial kitchen', '')
('truck', '')
('four', '')
('frosting', '')
('sugar', '')
('laying', '')
('white bag', '')
('orange tractor', '')
('formal', '')
('for bikes', '')
('frisbee', '')
('malaysia', '')
('potato', '')
('office', '')
('tv', '')
('white', '')
('hooves', '')
('fish', '')
('fruit', '')
('girl man', '')
('eating', '')
('broken bones', '')
('tin foil', '')
('crashing', '')
('john mcenroe', '')
('reading', '')
('sailing vessel', '')
('safety', '')
('sneakers', '')
('us', '')
('imac', '')
('under armour', '')
('africa', '')
('nothing', '')
('return serve', '')
('asian', '')
('union jack', '')
('skateboard', '')
('grocery store', '')
('on beach', '')
('sitting', '')
('hp', '')
('staring', '')
('watch tv', '')
('africa', '')
('fall season', '')
('left hand', '')
('gloves', '')
('for bikes', '')
('gymnasium', '')
('tank car', '')
('holding pencils', '')
('body surf', '')
('living room', '')
('metal', '')
('sails', '')
('playing baseball', '')
('two', '')
('wake up', '')
('tours', '')
('one', '')
('rice', '')
('microwave', '')
('wine tasting', '')
('one', '')
('stella artois', '')
('picture taking', '')
('forehand', '')
('microwave', '')
('clock', '')
('east asia', '')
('evening', '')
('t-hsirt', '')
('crust', '')