Adding Neuron-optimized model files

#274

by badaoui HF Staff - opened 26 days ago

base: refs/heads/main

←

from: refs/pr/274

Discussion Files changed

+367

-61

Files changed (15) hide show

.gitattributes +4 -1
README.md +17 -14
feature_extractor/preprocessor_config.json +10 -3
model_index.json +17 -10
scheduler/scheduler_config.json +5 -3
text_encoder/config.json +28 -2
text_encoder/model.neuron +3 -0
tokenizer/special_tokens_map.json +7 -1
tokenizer/tokenizer_config.json +22 -25
unet/config.json +97 -2
unet/model.neuron +3 -0
vae_decoder/config.json +74 -0
vae_decoder/model.neuron +3 -0
vae_encoder/config.json +74 -0
vae_encoder/model.neuron +3 -0

.gitattributes CHANGED Viewed

@@ -29,4 +29,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -textvae_decoder/model.neuron filter=lfs diff=lfs merge=lfs -text
+text_encoder/model.neuron filter=lfs diff=lfs merge=lfs -text
+unet/model.neuron filter=lfs diff=lfs merge=lfs -text
+vae_encoder/model.neuron filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -4,26 +4,29 @@ tags:
 - stable-diffusion
 - stable-diffusion-diffusers
 - text-to-image
 widget:
-- text: "A high tech solarpunk utopia in the Amazon rainforest"
   example_title: Amazon rainforest
-- text: "A pikachu fine dining with a view to the Eiffel Tower"
   example_title: Pikachu in Paris
-- text: "A mecha robot in a favela in expressionist style"
   example_title: Expressionist robot
-- text: "an insect robot preparing a delicious meal"
   example_title: Insect robot
-- text: "A small cabin on top of a snowy mountain in the style of Disney, artstation"
   example_title: Snowy disney cabin
-extra_gated_prompt: |-
-  This model is open access and available to all, with a CreativeML OpenRAIL-M license further specifying rights and usage.
-  The CreativeML OpenRAIL License specifies:
-  1. You can't use the model to deliberately produce nor share illegal or harmful outputs or content
-  2. The authors claim no rights on the outputs you generate, you are free to use them and are accountable for their use which must not go against the provisions set in the license
-  3. You may re-distribute the weights and use the model commercially and/or as a service. If you do, please be aware you have to include the same use restrictions as the ones in the license and share a copy of the CreativeML OpenRAIL-M to all your users (please read the license entirely and carefully)
-  Please read the full license carefully here: https://huggingface.co/spaces/CompVis/stable-diffusion-license
 extra_gated_heading: Please read the LICENSE to access this model
 ---

 - stable-diffusion
 - stable-diffusion-diffusers
 - text-to-image
+- neuron
 widget:
+- text: A high tech solarpunk utopia in the Amazon rainforest
   example_title: Amazon rainforest
+- text: A pikachu fine dining with a view to the Eiffel Tower
   example_title: Pikachu in Paris
+- text: A mecha robot in a favela in expressionist style
   example_title: Expressionist robot
+- text: an insect robot preparing a delicious meal
   example_title: Insect robot
+- text: A small cabin on top of a snowy mountain in the style of Disney, artstation
   example_title: Snowy disney cabin
+extra_gated_prompt: "This model is open access and available to all, with a CreativeML\
+  \ OpenRAIL-M license further specifying rights and usage.\nThe CreativeML OpenRAIL\
+  \ License specifies: \n\n1. You can't use the model to deliberately produce nor\
+  \ share illegal or harmful outputs or content \n2. The authors claim no rights on\
+  \ the outputs you generate, you are free to use them and are accountable for their\
+  \ use which must not go against the provisions set in the license\n3. You may re-distribute\
+  \ the weights and use the model commercially and/or as a service. If you do, please\
+  \ be aware you have to include the same use restrictions as the ones in the license\
+  \ and share a copy of the CreativeML OpenRAIL-M to all your users (please read the\
+  \ license entirely and carefully)\nPlease read the full license carefully here:\
+  \ https://huggingface.co/spaces/CompVis/stable-diffusion-license\n    "
 extra_gated_heading: Please read the LICENSE to access this model
 ---

feature_extractor/preprocessor_config.json CHANGED Viewed

@@ -1,20 +1,27 @@
 {
-  "crop_size": 224,
   "do_center_crop": true,
   "do_convert_rgb": true,
   "do_normalize": true,
   "do_resize": true,
-  "feature_extractor_type": "CLIPFeatureExtractor",
   "image_mean": [
     0.48145466,
     0.4578275,
     0.40821073
   ],
   "image_std": [
     0.26862954,
     0.26130258,
     0.27577711
   ],
   "resample": 3,
-  "size": 224
 }

 {
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
   "do_center_crop": true,
   "do_convert_rgb": true,
   "do_normalize": true,
+  "do_rescale": true,
   "do_resize": true,
   "image_mean": [
     0.48145466,
     0.4578275,
     0.40821073
   ],
+  "image_processor_type": "CLIPImageProcessor",
   "image_std": [
     0.26862954,
     0.26130258,
     0.27577711
   ],
   "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 224
+  }
 }

model_index.json CHANGED Viewed

@@ -1,32 +1,39 @@
 {
-  "_class_name": "StableDiffusionPipeline",
-  "_diffusers_version": "0.2.2",
   "feature_extractor": [
     "transformers",
     "CLIPImageProcessor"
   ],
   "safety_checker": [
-    "stable_diffusion",
-    "StableDiffusionSafetyChecker"
   ],
   "scheduler": [
     "diffusers",
     "PNDMScheduler"
   ],
   "text_encoder": [
-    "transformers",
-    "CLIPTextModel"
   ],
   "tokenizer": [
     "transformers",
     "CLIPTokenizer"
   ],
   "unet": [
-    "diffusers",
-    "UNet2DConditionModel"
   ],
   "vae": [
-    "diffusers",
-    "AutoencoderKL"
   ]
 }

 {
+  "_class_name": "NeuronStableDiffusionPipeline",
+  "_diffusers_version": "0.34.0",
   "feature_extractor": [
     "transformers",
     "CLIPImageProcessor"
   ],
+  "force_zeros_for_empty_prompt": true,
+  "image_encoder": [
+    null,
+    null
+  ],
+  "requires_aesthetics_score": false,
+  "requires_safety_checker": true,
   "safety_checker": [
+    null,
+    null
   ],
   "scheduler": [
     "diffusers",
     "PNDMScheduler"
   ],
   "text_encoder": [
+    "optimum.neuron.modeling_diffusion",
+    "NeuronModelTextEncoder"
   ],
   "tokenizer": [
     "transformers",
     "CLIPTokenizer"
   ],
   "unet": [
+    "optimum.neuron.modeling_diffusion",
+    "NeuronModelUnet"
   ],
   "vae": [
+    "optimum.neuron.modeling_diffusion",
+    "NeuronModelVae"
   ]
 }

scheduler/scheduler_config.json CHANGED Viewed

@@ -1,13 +1,15 @@
 {
   "_class_name": "PNDMScheduler",
-  "_diffusers_version": "0.7.0.dev0",
   "beta_end": 0.012,
   "beta_schedule": "scaled_linear",
   "beta_start": 0.00085,
   "num_train_timesteps": 1000,
   "set_alpha_to_one": false,
   "skip_prk_steps": true,
   "steps_offset": 1,
-  "trained_betas": null,
-  "clip_sample": false
 }

 {
   "_class_name": "PNDMScheduler",
+  "_diffusers_version": "0.34.0",
   "beta_end": 0.012,
   "beta_schedule": "scaled_linear",
   "beta_start": 0.00085,
+  "clip_sample": false,
   "num_train_timesteps": 1000,
+  "prediction_type": "epsilon",
   "set_alpha_to_one": false,
   "skip_prk_steps": true,
   "steps_offset": 1,
+  "timestep_spacing": "leading",
+  "trained_betas": null
 }

text_encoder/config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
-  "_name_or_path": "openai/clip-vit-large-patch14",
   "architectures": [
     "CLIPTextModel"
   ],
@@ -15,10 +16,35 @@
   "layer_norm_eps": 1e-05,
   "max_position_embeddings": 77,
   "model_type": "clip_text_model",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 1,
   "torch_dtype": "float32",
-  "transformers_version": "4.21.0.dev0",
   "vocab_size": 49408
 }

 {
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "/home/user/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/text_encoder",
   "architectures": [
     "CLIPTextModel"
   ],
   "layer_norm_eps": 1e-05,
   "max_position_embeddings": 77,
   "model_type": "clip_text_model",
+  "neuron": {
+    "auto_cast": "matmul",
+    "auto_cast_type": "bf16",
+    "compiler_type": "neuronx-cc",
+    "compiler_version": "2.15.143.0+e39249ad",
+    "dynamic_batch_size": false,
+    "inline_weights_to_neff": true,
+    "input_names": [
+      "input_ids"
+    ],
+    "model_type": "clip-text-model",
+    "optlevel": "2",
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_names": [
+      "last_hidden_state",
+      "pooler_output"
+    ],
+    "static_batch_size": 1,
+    "static_sequence_length": 77,
+    "tensor_parallel_size": 1
+  },
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 1,
+  "projection_dim": 512,
+  "task": "feature-extraction",
   "torch_dtype": "float32",
+  "torchscript": true,
+  "transformers_version": "4.48.3",
   "vocab_size": 49408
 }

text_encoder/model.neuron ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8a5032ffebf9b052ab36054209ecd3d360f2646ae7ea6527c6a2f063b1822f0
+size 375660151

tokenizer/special_tokens_map.json CHANGED Viewed

@@ -13,7 +13,13 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": "<|endoftext|>",
   "unk_token": {
     "content": "<|endoftext|>",
     "lstrip": false,

     "rstrip": false,
     "single_word": false
   },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
   "unk_token": {
     "content": "<|endoftext|>",
     "lstrip": false,

tokenizer/tokenizer_config.json CHANGED Viewed

@@ -1,34 +1,31 @@
 {
   "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
   },
   "do_lower_case": true,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
   "errors": "replace",
   "model_max_length": 77,
-  "name_or_path": "openai/clip-vit-large-patch14",
   "pad_token": "<|endoftext|>",
-  "special_tokens_map_file": "./special_tokens_map.json",
   "tokenizer_class": "CLIPTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
   "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "49406": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49407": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
   },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": false,
   "do_lower_case": true,
+  "eos_token": "<|endoftext|>",
   "errors": "replace",
+  "extra_special_tokens": {},
   "model_max_length": 77,
   "pad_token": "<|endoftext|>",
   "tokenizer_class": "CLIPTokenizer",
+  "unk_token": "<|endoftext|>"
 }

unet/config.json CHANGED Viewed

@@ -1,8 +1,47 @@
 {
   "_class_name": "UNet2DConditionModel",
-  "_diffusers_version": "0.2.2",
   "act_fn": "silu",
   "attention_head_dim": 8,
   "block_out_channels": [
     320,
     640,
@@ -10,7 +49,12 @@
     1280
   ],
   "center_input_sample": false,
   "cross_attention_dim": 768,
   "down_block_types": [
     "CrossAttnDownBlock2D",
     "CrossAttnDownBlock2D",
@@ -18,19 +62,70 @@
     "DownBlock2D"
   ],
   "downsample_padding": 1,
   "flip_sin_to_cos": true,
   "freq_shift": 0,
   "in_channels": 4,
   "layers_per_block": 2,
   "mid_block_scale_factor": 1,
   "norm_eps": 1e-05,
   "norm_num_groups": 32,
   "out_channels": 4,
   "sample_size": 64,
   "up_block_types": [
     "UpBlock2D",
     "CrossAttnUpBlock2D",
     "CrossAttnUpBlock2D",
     "CrossAttnUpBlock2D"
-  ]
 }

 {
   "_class_name": "UNet2DConditionModel",
+  "_commit_hash": null,
+  "_diffusers_version": "0.34.0",
+  "_name_or_path": "/home/user/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/unet",
+  "_use_default_values": [
+    "cross_attention_norm",
+    "dropout",
+    "timestep_post_act",
+    "time_cond_proj_dim",
+    "mid_block_type",
+    "resnet_time_scale_shift",
+    "class_embeddings_concat",
+    "time_embedding_act_fn",
+    "mid_block_only_cross_attention",
+    "conv_in_kernel",
+    "conv_out_kernel",
+    "reverse_transformer_layers_per_block",
+    "time_embedding_type",
+    "num_attention_heads",
+    "addition_time_embed_dim",
+    "upcast_attention",
+    "time_embedding_dim",
+    "addition_embed_type",
+    "addition_embed_type_num_heads",
+    "resnet_skip_time_act",
+    "only_cross_attention",
+    "dual_cross_attention",
+    "attention_type",
+    "transformer_layers_per_block",
+    "encoder_hid_dim_type",
+    "encoder_hid_dim",
+    "class_embed_type",
+    "projection_class_embeddings_input_dim",
+    "use_linear_projection",
+    "num_class_embeds",
+    "resnet_out_scale_factor"
+  ],
   "act_fn": "silu",
+  "addition_embed_type": null,
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": null,
   "attention_head_dim": 8,
+  "attention_type": "default",
   "block_out_channels": [
     320,
     640,
     1280
   ],
   "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
   "cross_attention_dim": 768,
+  "cross_attention_norm": null,
   "down_block_types": [
     "CrossAttnDownBlock2D",
     "CrossAttnDownBlock2D",
     "DownBlock2D"
   ],
   "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
   "flip_sin_to_cos": true,
   "freq_shift": 0,
   "in_channels": 4,
   "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
   "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "neuron": {
+    "auto_cast": "matmul",
+    "auto_cast_type": "bf16",
+    "compiler_type": "neuronx-cc",
+    "compiler_version": "2.15.143.0+e39249ad",
+    "dynamic_batch_size": false,
+    "inline_weights_to_neff": true,
+    "input_names": [
+      "sample",
+      "timestep",
+      "encoder_hidden_states"
+    ],
+    "model_type": "unet",
+    "optlevel": "2",
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_names": [
+      "sample"
+    ],
+    "static_batch_size": 1,
+    "static_height": 64,
+    "static_num_channels": 4,
+    "static_sequence_length": 77,
+    "static_vae_scale_factor": 8,
+    "static_width": 64,
+    "tensor_parallel_size": 1
+  },
   "norm_eps": 1e-05,
   "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
   "out_channels": 4,
+  "projection_class_embeddings_input_dim": null,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
   "sample_size": 64,
+  "task": "semantic-segmentation",
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": 1,
+  "transformers_version": null,
   "up_block_types": [
     "UpBlock2D",
     "CrossAttnUpBlock2D",
     "CrossAttnUpBlock2D",
     "CrossAttnUpBlock2D"
+  ],
+  "upcast_attention": false,
+  "use_linear_projection": false
 }

unet/model.neuron ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d3c33c39c4dbf4fd193e6bb4c8a09da946bc299b5061d30437c086ef0bd9569
+size 1900028915

vae_decoder/config.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_commit_hash": null,
+  "_diffusers_version": "0.34.0",
+  "_name_or_path": "/home/user/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/vae",
+  "_use_default_values": [
+    "norm_num_groups",
+    "force_upcast",
+    "use_post_quant_conv",
+    "latents_std",
+    "use_quant_conv",
+    "mid_block_add_attention",
+    "latents_mean",
+    "shift_factor"
+  ],
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 4,
+  "latents_mean": null,
+  "latents_std": null,
+  "layers_per_block": 2,
+  "mid_block_add_attention": true,
+  "neuron": {
+    "auto_cast": "matmul",
+    "auto_cast_type": "bf16",
+    "compiler_type": "neuronx-cc",
+    "compiler_version": "2.15.143.0+e39249ad",
+    "dynamic_batch_size": false,
+    "inline_weights_to_neff": true,
+    "input_names": [
+      "latent_sample"
+    ],
+    "model_type": "vae-decoder",
+    "optlevel": "2",
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_names": [
+      "sample"
+    ],
+    "static_batch_size": 1,
+    "static_height": 64,
+    "static_num_channels": 4,
+    "static_width": 64,
+    "tensor_parallel_size": 1
+  },
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 512,
+  "scaling_factor": 0.18215,
+  "shift_factor": null,
+  "task": "semantic-segmentation",
+  "transformers_version": null,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "use_post_quant_conv": true,
+  "use_quant_conv": true
+}

vae_decoder/model.neuron ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4157adfc4fb887890bcc46dce7b0a6cb352f3cf96f3fd528d4fb48c75605c5c1
+size 247896819

vae_encoder/config.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_commit_hash": null,
+  "_diffusers_version": "0.34.0",
+  "_name_or_path": "/home/user/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/vae",
+  "_use_default_values": [
+    "norm_num_groups",
+    "force_upcast",
+    "use_post_quant_conv",
+    "latents_std",
+    "use_quant_conv",
+    "mid_block_add_attention",
+    "latents_mean",
+    "shift_factor"
+  ],
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 4,
+  "latents_mean": null,
+  "latents_std": null,
+  "layers_per_block": 2,
+  "mid_block_add_attention": true,
+  "neuron": {
+    "auto_cast": "matmul",
+    "auto_cast_type": "bf16",
+    "compiler_type": "neuronx-cc",
+    "compiler_version": "2.15.143.0+e39249ad",
+    "dynamic_batch_size": false,
+    "inline_weights_to_neff": true,
+    "input_names": [
+      "sample"
+    ],
+    "model_type": "vae-encoder",
+    "optlevel": "2",
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_names": [
+      "latent_parameters"
+    ],
+    "static_batch_size": 1,
+    "static_height": 512,
+    "static_num_channels": 3,
+    "static_width": 512,
+    "tensor_parallel_size": 1
+  },
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 512,
+  "scaling_factor": 0.18215,
+  "shift_factor": null,
+  "task": "semantic-segmentation",
+  "transformers_version": null,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "use_post_quant_conv": true,
+  "use_quant_conv": true
+}

vae_encoder/model.neuron ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:702c82996a50c462cdbe251483af14f8fa55541c2e89f1735b3387773c162892
+size 155910897