jackyliang42's picture
working video
9a40e4f
raw
history blame
5.02 kB
import openai
import numpy as np
from tempfile import NamedTemporaryFile
import copy
import shapely
from shapely.geometry import *
from shapely.affinity import *
from omegaconf import OmegaConf
from moviepy.editor import ImageSequenceClip
import gradio as gr
from lmp import LMP, LMPFGen
from sim import PickPlaceEnv, LMP_wrapper
from consts import ALL_BLOCKS, ALL_BOWLS
class DemoRunner:
def __init__(self):
self._cfg = OmegaConf.to_container(OmegaConf.load('cfg.yaml'), resolve=True)
self._env = None
self._model_name = ''
def make_LMP(self, env):
# LMP env wrapper
cfg = copy.deepcopy(self._cfg)
cfg['env'] = {
'init_objs': list(env.obj_name_to_id.keys()),
'coords': cfg['tabletop_coords']
}
for vs in cfg['lmps'].values():
vs['engine'] = self._model_name
LMP_env = LMP_wrapper(env, cfg)
# creating APIs that the LMPs can interact with
fixed_vars = {
'np': np
}
fixed_vars.update({
name: eval(name)
for name in shapely.geometry.__all__ + shapely.affinity.__all__
})
variable_vars = {
k: getattr(LMP_env, k)
for k in [
'get_bbox', 'get_obj_pos', 'get_color', 'is_obj_visible', 'denormalize_xy',
'put_first_on_second', 'get_obj_names',
'get_corner_name', 'get_side_name',
]
}
variable_vars['say'] = lambda msg: print(f'robot says: {msg}')
# creating the function-generating LMP
lmp_fgen = LMPFGen(cfg['lmps']['fgen'], fixed_vars, variable_vars)
# creating other low-level LMPs
variable_vars.update({
k: LMP(k, cfg['lmps'][k], lmp_fgen, fixed_vars, variable_vars)
for k in ['parse_obj_name', 'parse_position', 'parse_question', 'transform_shape_pts']
})
# creating the LMP that deals w/ high-level language commands
lmp_tabletop_ui = LMP(
'tabletop_ui', cfg['lmps']['tabletop_ui'], lmp_fgen, fixed_vars, variable_vars
)
return lmp_tabletop_ui
def setup(self, api_key, model_name, n_blocks, n_bowls):
openai.api_key = api_key
self._model_name = model_name
self._env = PickPlaceEnv(render=True, high_res=False, high_frame_rate=False)
block_list = np.random.choice(ALL_BLOCKS, size=n_blocks, replace=False).tolist()
bowl_list = np.random.choice(ALL_BOWLS, size=n_bowls, replace=False).tolist()
obj_list = block_list + bowl_list
self._env.reset(obj_list)
self._lmp_tabletop_ui = self.make_LMP(self._env)
info = '## Available objects: \n- ' + '\n- '.join(obj_list)
img = self._env.get_camera_image()
return info, img
def run(self, instruction):
if self._env is None:
return 'Please run setup first'
self._env.cache_video = []
self._lmp_tabletop_ui(instruction, f'objects = {self._env.object_list}')
video_file_name = ''
if self._env.cache_video:
rendered_clip = ImageSequenceClip(self._env.cache_video, fps=25)
video_file_name = NamedTemporaryFile(suffix='.mp4', delete=False).name
rendered_clip.write_videofile(video_file_name, fps=25)
return 'Done', video_file_name
if __name__ == '__main__':
demo_runner = DemoRunner()
demo = gr.Blocks()
with demo:
with gr.Row():
with gr.Column():
with gr.Row():
inp_api_key = gr.Textbox(label='OpenAI API Key', lines=1, value='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')
inp_model_name = gr.Dropdown(label='Model Name', choices=['code-davinci-002', 'text-davinci-002'], value='code-davinci-002')
with gr.Row():
inp_n_blocks = gr.Slider(label='Num Blocks', minimum=0, maximum=3, value=3, step=1)
inp_n_bowls = gr.Slider(label='Num Bowls', minimum=0, maximum=3, value=3, step=1)
btn_setup = gr.Button("1) Setup/Reset Env")
info_setup = gr.Markdown(label='Setup Info')
with gr.Column():
img_setup = gr.Image(label='Setup Image')
with gr.Row():
with gr.Column():
inp_instruction = gr.Textbox(label='Instruction', lines=1)
btn_run = gr.Button("2) Run Instruction")
info_run = gr.Label(label='Run Info')
with gr.Column():
video_run = gr.Video(label='Run Video')
btn_setup.click(
demo_runner.setup,
inputs=[inp_api_key, inp_model_name, inp_n_blocks, inp_n_bowls],
outputs=[info_setup, img_setup]
)
btn_run.click(
demo_runner.run,
inputs=[inp_instruction],
outputs=[info_run, video_run]
)
demo.launch()