Cannot see ["Wqkv", "out_proj"] modules in PHI-2 that is to be used in Universal-NER. How is it working.

#1
by sbakhtyar - opened

phi-2 Model :
PhiForCausalLM(
(model): PhiModel(
(embed_tokens): Embedding(51200, 2560)
(embed_dropout): Dropout(p=0.0, inplace=False)
(layers): ModuleList(
(0-31): 32 x PhiDecoderLayer(
(self_attn): PhiAttention(
(q_proj): Linear4bit(in_features=2560, out_features=2560, bias=True)
(k_proj): Linear4bit(in_features=2560, out_features=2560, bias=True)
(v_proj): Linear4bit(in_features=2560, out_features=2560, bias=True)
(dense): Linear4bit(in_features=2560, out_features=2560, bias=True)
(rotary_emb): PhiRotaryEmbedding()
)
(mlp): PhiMLP(
(activation_fn): NewGELUActivation()
(fc1): Linear4bit(in_features=2560, out_features=10240, bias=True)
(fc2): Linear4bit(in_features=10240, out_features=2560, bias=True)
)
(input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
)
(final_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
)
(lm_head): Linear(in_features=2560, out_features=51200, bias=True)
)

Cannot see ["Wqkv", "out_proj"] modules that is to be used in Universal-NER. How is it working.

Hi @sbakhtyar , if you check my github fine tune code you can see that model has ["Wqkv", "out_proj"] modules in it. I am not sure why your code doesn't show it. I may possible that the Owner of the model updated the naming conventions. Model was under research license when I created tutorial but now it's under MIT license. I checked model, you can replace ["Wqkv", "out_proj"] with [q_proj, k_proj, v_proj, dense].

Mit1208 changed discussion status to closed
Mit1208 changed discussion status to open

@sbakhtyar , I have updated the config file. The issue is resolved.

Mit1208 changed discussion status to closed

So, I already tried with the target change and used it but even then I was getting error on
trainer.train
the error I have pasted here, please let me know what do you think? :

TypeError Traceback (most recent call last)
/tmp/ipykernel_2985/3070468114.py in <cell line: 8>()
6 )
7
----> 8 trainer.train()

/opt/conda/lib/python3.9/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1537 hf_hub_utils.enable_progress_bars()
1538 else:
-> 1539 return inner_training_loop(
1540 args=args,
1541 resume_from_checkpoint=resume_from_checkpoint,

/opt/conda/lib/python3.9/site-packages/transformers/trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1785 model.zero_grad()
1786
-> 1787 self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
1788
1789 # Skip the first epochs_trained epochs to get the random state of the dataloader at the right point.

/opt/conda/lib/python3.9/site-packages/transformers/trainer_callback.py in on_train_begin(self, args, state, control)
368 def on_train_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl):
369 control.should_training_stop = False
--> 370 return self.call_event("on_train_begin", args, state, control)
371
372 def on_train_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl):

/opt/conda/lib/python3.9/site-packages/transformers/trainer_callback.py in call_event(self, event, args, state, control, **kwargs)
412 def call_event(self, event, args, state, control, **kwargs):
413 for callback in self.callbacks:
--> 414 result = getattr(callback, event)(
415 args,
416 state,

/opt/conda/lib/python3.9/site-packages/transformers/integrations/integration_utils.py in on_train_begin(self, args, state, control, model, **kwargs)
1022 def on_train_begin(self, args, state, control, model=None, **kwargs):
1023 if not self._initialized:
-> 1024 self.setup(args, state, model)
1025
1026 def on_log(self, args, state, control, logs, model=None, **kwargs):

/opt/conda/lib/python3.9/site-packages/transformers/integrations/integration_utils.py in setup(self, args, state, model)
991 # Use of set_experiment() ensure that Experiment is created if not exists
992 self._ml_flow.set_experiment(self._experiment_name)
--> 993 self._ml_flow.start_run(run_name=args.run_name, nested=self._nested_run)
994 logger.debug(f"MLflow run started with run_id={self._ml_flow.active_run().info.run_id}")
995 self._auto_end_run = True

/opt/conda/lib/python3.9/site-packages/mlflow/tracking/fluent.py in start_run(run_id, experiment_id, run_name, nested, tags)
258 tags = context_registry.resolve_tags(user_specified_tags)
259
--> 260 active_run_obj = MlflowClient().create_run(experiment_id=exp_id_for_run, tags=tags)
261
262 _active_run_stack.append(ActiveRun(active_run_obj))

/opt/conda/lib/python3.9/site-packages/mlflow/tracking/client.py in create_run(self, experiment_id, start_time, tags)
254 status: RUNNING
255 """
--> 256 return self._tracking_client.create_run(experiment_id, start_time, tags)
257
258 def list_run_infos(

/opt/conda/lib/python3.9/site-packages/mlflow/tracking/_tracking_service/client.py in create_run(self, experiment_id, start_time, tags)
99 user_id = tags.get(MLFLOW_USER, "unknown")
100
--> 101 return self.store.create_run(
102 experiment_id=experiment_id,
103 user_id=user_id,

/opt/conda/lib/python3.9/site-packages/mlflow/store/tracking/file_store.py in create_run(self, experiment_id, user_id, start_time, tags)
470 run_uuid = uuid.uuid4().hex
471 artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
--> 472 run_info = RunInfo(
473 run_uuid=run_uuid,
474 run_id=run_uuid,

TypeError: init() missing 1 required positional argument: 'experiment_name'

sbakhtyar changed discussion status to open

@sbakhtyar , can you provide me with more details? which code are you trying to execute? It would be great if you can share your colab.

It was a version issue. Its resolved. Thankyou. We can close this.

sbakhtyar changed discussion status to closed
This comment has been hidden

Sign up or log in to comment