Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			A100
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			A100
	| # Copyright (c) 2025 NVIDIA CORPORATION. | |
| # Licensed under the MIT license. | |
| # Adapted from https://github.com/NVlabs/VILA/tree/main under the Apache 2.0 license. | |
| # LICENSE is in incl_licenses directory. | |
| from .language_model.llava_llama import LlavaLlamaConfig, LlavaLlamaModel | |
| # FP8 related comments, development in progress (PI: ligeng zhu, haochen xi) | |
| # NOTE: VLM + LLM | |
| # from .language_model.qllava_qllama import QLlavaLlamaConfig, QLlavaLlamaModel | |
| # NOTE: Linear -> fp8, similar to transformer engine | |
| # from .language_model.qllama import QLlamaConfig, QLlamaForCausalLM, QLlamaModel | |
| # NOTE: Linear + Activation -> fp8, haochen's iclr version | |
| # from .language_model.qmemllama import QMemLlamaConfig, QMemLlamaForCausalLM, QMemLlamaModel | |
| """ | |
| TODO: | |
| linear(weights): | |
| simulated fp8: done | |
| real fp8: in-progress (code already implmented) | |
| activation: | |
| simulated fp8: done | |
| real fp8: in-progress (still coding) | |
| optimizers: | |
| current VILA: bf16 | |
| simulated fp8: done | |
| real fp8 + fsdp (single node): done | |
| real fp8 + fsdp (multiple node): in-progress | |
| 1. linear fp8 | |
| 2. activation fp8 | |
| 3. fp8 infernce example (load directly from a fp8 and fwd) | |
| 4. bind fp8 related configs to QLlamaConfig {"coat_fp8_args": {}} | |
| """ | |
| from .language_model.fp8linearqwen2 import FP8LinearQwen2Config, FP8LinearQwen2Model | |
| from .language_model.qllava_qllama import QLlavaLlamaConfig, QLlavaLlamaModel | |
