Upload folder using huggingface_hub

032e687 verified 10 months ago

1.1 kB

	import json

	def load_jsonl(json_file):
	with open(json_file) as f:
	lines = f.readlines()
	data = []
	for line in lines:
	data.append(json.loads(line))
	return data


	json_data = "/mnt/bn/xiangtai-training-data/project/VLM/data/SOLO_SFT/all_data.jsonl"

	image_data = "/mnt/bn/xiangtai-training-data/project/VLM/data/SOLO_SFT/images"

	a = load_jsonl(json_data)



	for index, i in enumerate(a):
	conversations = i['conversations']
	image_name = i['image']
	for msg in conversations:
	if "role" in msg.keys():
	print(i)
	print(index)
	exit()
	elif 'from' in msg.keys():
	continue
	elif 'value' in msg.keys():
	continue
	else:
	print(msg.keys)
	# if msg['from'] == 'human' or msg['from'] == 'user' or msg['role'] == 'user':
	# continue

	# elif msg['from'] == 'gpt' or msg['from'] == 'model' or msg['role'] == 'assistant':
	# continue

	# for item in conversations:
	# if type(item) is str:
	# print(conversations)