|  | <!DOCTYPE html> | 
					
						
						|  | <html lang="en"> | 
					
						
						|  | <head> | 
					
						
						|  |  | 
					
						
						|  | <script async src="https://www.googletagmanager.com/gtag/js?id=G-0JKBJ3WRJZ"></script> | 
					
						
						|  | <script> | 
					
						
						|  | window.dataLayer = window.dataLayer || []; | 
					
						
						|  | function gtag(){dataLayer.push(arguments);} | 
					
						
						|  | gtag('js', new Date()); | 
					
						
						|  | gtag('config', 'G-0JKBJ3WRJZ'); | 
					
						
						|  | </script> | 
					
						
						|  |  | 
					
						
						|  | <link rel="preconnect" href="https://fonts.googleapis.com"> | 
					
						
						|  | <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> | 
					
						
						|  | <link href="https://fonts.googleapis.com/css2?family=Source+Sans+3&display=swap" rel="stylesheet"> | 
					
						
						|  | <meta charset="UTF-8"> | 
					
						
						|  | <title>MMAudio</title> | 
					
						
						|  |  | 
					
						
						|  | <link rel="icon" type="image/png" href="images/icon.png"> | 
					
						
						|  |  | 
					
						
						|  | <meta name="viewport" content="width=device-width, initial-scale=1"> | 
					
						
						|  |  | 
					
						
						|  | <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" | 
					
						
						|  | integrity="sha384-+0n0xVW2eSR5OomGNYDnhzAbDsOXxcvSN1TPprVMTNDbiYZCxYbOOl7+AMvyTG2x" crossorigin="anonymous"> | 
					
						
						|  | <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script> | 
					
						
						|  |  | 
					
						
						|  | <link rel="stylesheet" href="style.css"> | 
					
						
						|  | </head> | 
					
						
						|  | <body> | 
					
						
						|  |  | 
					
						
						|  | <body> | 
					
						
						|  | <br><br><br><br> | 
					
						
						|  | <div class="container"> | 
					
						
						|  | <div class="row text-center" style="font-size:38px"> | 
					
						
						|  | <div class="col strong"> | 
					
						
						|  | Taming Multimodal Joint Training for High-Quality <br>Video-to-Audio Synthesis | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <br> | 
					
						
						|  | <div class="row text-center" style="font-size:28px"> | 
					
						
						|  | <div class="col"> | 
					
						
						|  | arXiv 2024 | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  | <br> | 
					
						
						|  |  | 
					
						
						|  | <div class="h-100 row text-center heavy justify-content-md-center" style="font-size:22px;"> | 
					
						
						|  | <div class="col-sm-auto px-lg-2"> | 
					
						
						|  | <a href="https://hkchengrex.github.io/">Ho Kei Cheng<sup>1</sup></a> | 
					
						
						|  | </div> | 
					
						
						|  | <div class="col-sm-auto px-lg-2"> | 
					
						
						|  | <nobr><a href="https://scholar.google.co.jp/citations?user=RRIO1CcAAAAJ">Masato Ishii<sup>2</sup></a></nobr> | 
					
						
						|  | </div> | 
					
						
						|  | <div class="col-sm-auto px-lg-2"> | 
					
						
						|  | <nobr><a href="https://scholar.google.com/citations?user=sXAjHFIAAAAJ">Akio Hayakawa<sup>2</sup></a></nobr> | 
					
						
						|  | </div> | 
					
						
						|  | <div class="col-sm-auto px-lg-2"> | 
					
						
						|  | <nobr><a href="https://scholar.google.com/citations?user=XCRO260AAAAJ">Takashi Shibuya<sup>2</sup></a></nobr> | 
					
						
						|  | </div> | 
					
						
						|  | <div class="col-sm-auto px-lg-2"> | 
					
						
						|  | <nobr><a href="https://www.alexander-schwing.de/">Alexander Schwing<sup>1</sup></a></nobr> | 
					
						
						|  | </div> | 
					
						
						|  | <div class="col-sm-auto px-lg-2" > | 
					
						
						|  | <nobr><a href="https://www.yukimitsufuji.com/">Yuki Mitsufuji<sup>2,3</sup></a></nobr> | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <div class="h-100 row text-center heavy justify-content-md-center" style="font-size:22px;"> | 
					
						
						|  | <div class="col-sm-auto px-lg-2"> | 
					
						
						|  | <sup>1</sup>University of Illinois Urbana-Champaign | 
					
						
						|  | </div> | 
					
						
						|  | <div class="col-sm-auto px-lg-2"> | 
					
						
						|  | <sup>2</sup>Sony AI | 
					
						
						|  | </div> | 
					
						
						|  | <div class="col-sm-auto px-lg-2"> | 
					
						
						|  | <sup>3</sup>Sony Group Corporation | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <br> | 
					
						
						|  |  | 
					
						
						|  | <br> | 
					
						
						|  |  | 
					
						
						|  | <div class="h-100 row text-center justify-content-md-center" style="font-size:20px;"> | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | <div class="col-sm-3"> | 
					
						
						|  | <a href="">[Paper (being prepared)]</a> | 
					
						
						|  | </div> | 
					
						
						|  | <div class="col-sm-3"> | 
					
						
						|  | <a href="https://github.com/hkchengrex/MMAudio">[Code]</a> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <br> | 
					
						
						|  |  | 
					
						
						|  | <hr> | 
					
						
						|  |  | 
					
						
						|  | <div class="row" style="font-size:32px"> | 
					
						
						|  | <div class="col strong"> | 
					
						
						|  | TL;DR | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  | <br> | 
					
						
						|  | <div class="row"> | 
					
						
						|  | <div class="col"> | 
					
						
						|  | <p class="light" style="text-align: left;"> | 
					
						
						|  | MMAudio generates synchronized audio given video and/or text inputs. | 
					
						
						|  | </p> | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <br> | 
					
						
						|  | <hr> | 
					
						
						|  | <br> | 
					
						
						|  |  | 
					
						
						|  | <div class="row" style="font-size:32px"> | 
					
						
						|  | <div class="col strong"> | 
					
						
						|  | Demo | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  | <br> | 
					
						
						|  | <div class="row" style="font-size:48px"> | 
					
						
						|  | <div class="col strong text-center"> | 
					
						
						|  | <a href="video_main.html" style="text-decoration: underline;"><More results></a> | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  | <br> | 
					
						
						|  | <div class="video-container" style="text-align: center;"> | 
					
						
						|  | <iframe src="https://youtube.com/embed/YElewUT2M4M"></iframe> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <br> | 
					
						
						|  |  | 
					
						
						|  | <br><br> | 
					
						
						|  | <br><br> | 
					
						
						|  |  | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | </body> | 
					
						
						|  | </html> |