MDM-1.7B / generate.py

maomaocun

git init

347e409 28 days ago

6.77 kB

	import torch
	import numpy as np
	import torch.nn.functional as F
	from transformers import AutoTokenizer, AutoModel

	def add_gumbel_noise(logits, temperature):
	if temperature == 0:
	return logits
	logits = logits.to(torch.float64)
	noise = torch.rand_like(logits, dtype=torch.float64)
	gumbel_noise = (- torch.log(noise)) ** temperature
	return logits.exp() / gumbel_noise

	def get_num_transfer_tokens(mask_index, steps):
	mask_num = mask_index.sum(dim=1, keepdim=True)
	base = mask_num // steps
	remainder = mask_num % steps
	num_transfer_tokens = torch.zeros(mask_num.size(0), steps, device=mask_index.device, dtype=torch.int64) + base
	for i in range(mask_num.size(0)):
	num_transfer_tokens[i, :remainder[i]] += 1
	return num_transfer_tokens

	@torch.no_grad()
	def generate(model, prompt, steps=128, gen_length=128, block_length=128, temperature=0.,
	cfg_scale=0., remasking='low_confidence', mask_id=128108):
	x = torch.full((1, prompt.shape[1] + gen_length), mask_id, dtype=torch.long).to(model.device)
	x[:, :prompt.shape[1]] = prompt.clone()
	prompt_index = (x != mask_id)
	assert gen_length % block_length == 0
	num_blocks = gen_length // block_length
	assert steps % num_blocks == 0
	steps = steps // num_blocks
	for num_block in range(num_blocks):
	block_mask_index = (x[:, prompt.shape[1] + num_block * block_length: prompt.shape[1] + (num_block + 1) * block_length:] == mask_id)
	num_transfer_tokens = get_num_transfer_tokens(block_mask_index, steps)
	for i in range(steps):
	mask_index = (x == mask_id)
	if cfg_scale > 0.:
	un_x = x.clone()
	un_x[prompt_index] = mask_id
	x_ = torch.cat([x, un_x], dim=0)
	logits = model(x_).logits
	logits, un_logits = torch.chunk(logits, 2, dim=0)
	logits = un_logits + (cfg_scale + 1) * (logits - un_logits)
	else:
	logits = model(x).logits
	logits_with_noise = add_gumbel_noise(logits, temperature=temperature)
	x0 = torch.argmax(logits_with_noise, dim=-1)
	if remasking == 'low_confidence':
	p = F.softmax(logits, dim=-1)
	x0_p = torch.squeeze(
	torch.gather(p, dim=-1, index=torch.unsqueeze(x0, -1)), -1)
	elif remasking == 'random':
	x0_p = torch.rand((x0.shape[0], x0.shape[1]), device=x0.device)
	else:
	raise NotImplementedError(remasking)
	x0_p[:, prompt.shape[1] + (num_block + 1) * block_length:] = -np.inf
	x0 = torch.where(mask_index, x0, x)
	confidence = torch.where(mask_index, x0_p, -np.inf)
	transfer_index = torch.zeros_like(x0, dtype=torch.bool, device=x0.device)
	for j in range(confidence.shape[0]):
	k = int(num_transfer_tokens[j, i].item())
	_, select_index = torch.topk(confidence[j], k=k)
	transfer_index[j, select_index] = True
	x[transfer_index] = x0[transfer_index]
	return x[:, prompt.shape[1]:]

	if __name__ == "__main__":
	model_path = "/cpfs02/shared/llmit6/liudawei/xpuyu_work_dirs/internlm2-1_8b-myds-llada-sft-v3/pretrain-310000-yhc-padto2power/20250430200836/release"
	device = torch.device("cuda")
	tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
	model = AutoModel.from_pretrained(
	model_path,
	torch_dtype=torch.bfloat16,
	device_map=None,
	trust_remote_code=True
	).to(device)
	input_text = "Question: Jen and Tyler are gymnasts practicing flips. Jen is practicing the triple-flip while Tyler is practicing the double-flip. Jen did sixteen triple-flips during practice. Tyler flipped in the air half the number of times Jen did. How many double-flips did Tyler do?\nAnswer: Jen did 16 triple-flips, so she did 16 * 3 = <<163=48>>48 flips.\nTyler did half the number of flips, so he did 48 / 2 = <<48/2=24>>24 flips.\nA double flip has two flips, so Tyler did 24 / 2 = <<24/2=12>>12 double-flips.\n#### 12\n\nQuestion: Four people in a law firm are planning a party. Mary will buy a platter of pasta for $20 and a loaf of bread for $2. Elle and Andrea will split the cost for buying 4 cans of soda which cost $1.50 each, and chicken wings for $10. Joe will buy a cake that costs $5. How much more will Mary spend than the rest of the firm put together?\nAnswer: Mary will spend $20 + $2 = $<<20+2=22>>22.\nElle and Andrea will spend $1.5 x 4 = $<<1.54=6>>6 for the soda.\nElle and Andrea will spend $6 + $10 = $<<6+10=16>>16 for the soda and chicken wings.\nElle, Andrea, and Joe together will spend $16 + $5 = $<<16+5=21>>21.\nSo, Mary will spend $22 - $21 = $<<22-21=1>>1 more than all of them combined.\n#### 1\n\nQuestion: A charcoal grill burns fifteen coals to ash every twenty minutes of grilling. The grill ran for long enough to burn three bags of coals. Each bag of coal contains 60 coals. How long did the grill run?\nAnswer: The grill burned 3 * 60 = <<360=180>>180 coals.\nIt takes 20 minutes to burn 15 coals, so the grill ran for 180 / 15 20 = <<180/1520=240>>240 minutes.\n#### 240\n\nQuestion: A bear is preparing to hibernate for the winter and needs to gain 1000 pounds. At the end of summer, the bear feasts on berries and small woodland animals. During autumn, it devours acorns and salmon. It gained a fifth of the weight it needed from berries during summer, and during autumn, it gained twice that amount from acorns. Salmon made up half of the remaining weight it had needed to gain. How many pounds did it gain eating small animals?\nAnswer: The bear gained 1 / 5 1000 = <<1/51000=200>>200 pounds from berries.\nIt gained 2 200 = <<2*200=400>>400 pounds from acorns.\nIt still needed 1000 - 200 - 400 = <<1000-200-400=400>>400 pounds.\nThus, it gained 400 / 2 = <<400/2=200>>200 pounds from salmon.\nTherefore, the bear gained 400 - 200 = <<400-200=200>>200 pounds from small animals.\n#### 200\n\nQuestion: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?\nAnswer:"
	prompt = tokenizer.apply_chat_template(
	[{"role": "user", "content": input_text}],
	add_generation_prompt=True,
	return_tensors="pt"
	).to(device)
	print(f"输入文本: {input_text}")
	result_ids = generate(model, prompt)
	result_text = tokenizer.decode(result_ids[0], skip_special_tokens=True)
	print(f"最终输出: {result_text}")