from preprocessed_dialogs import dialog_data top_count = 0 second_count = 0 top_list = 0 average_list = [] list_average = [] for keys, values in dialog_data.items(): list_length = len(values) # Length of Pairs list_average.append(list_length) list_1 = [item[0] for item in values] list_2 = [item[1] for item in values] for items in list_1: if items and items != "": if isinstance(list_1, list): data = len(items.split(' ')) + 1 average_list.append(data) # print(f"Length of words: {data}") for items in list_2: if items and items != "": if isinstance(list_2, list): data = len(items.split(' ')) + 1 average_list.append(data) # print(f"Length of words: {data}") list_2 = [item[1] for item in values] # print(f"List 1: {list(list_1)}") # print(f"List 2: {list(list_2)}") if list_length > second_count and list_length < top_count: second_count = list_length if list_length > top_count: top_count = list_length if len(list_1) > top_list: top_list = len(list_1) # print(f"Conversation ID: {keys} \nLength of Pairs: {list_length}") item_total = 0 for items in average_list: item_total += items average_total = item_total / len(average_list) # print(f"Average Length of Sentence: {average_total}") item_total = 0 for items in list_average: item_total += items average_total = item_total / len(list_average) # print(f"Average Length of Pairs: {average_total}") print(f"Average Length of Pairs: {average_total}") print(f"Conversation 0: {top_count} \nEmbedding Dimension Min: {second_count}") print(f"Max Sequence Length: {top_list}") print(f"Max Word in Sentence Count: {max(average_list)}")