Spaces:
Sleeping
Sleeping
| from document_qa.document_qa_engine import TextMerger | |
| def test_merge_passages_small_chunk(): | |
| merger = TextMerger() | |
| passages = [ | |
| { | |
| 'text': "The quick brown fox jumps over the tree", | |
| 'coordinates': '1' | |
| }, | |
| { | |
| 'text': "and went straight into the mouth of a bear.", | |
| 'coordinates': '2' | |
| }, | |
| { | |
| 'text': "The color of the colors is a color with colors", | |
| 'coordinates': '3' | |
| }, | |
| { | |
| 'text': "the main colors are not the colorw we show", | |
| 'coordinates': '4' | |
| } | |
| ] | |
| new_passages = merger.merge_passages(passages, chunk_size=10, tolerance=0) | |
| assert len(new_passages) == 4 | |
| assert new_passages[0]['coordinates'] == "1" | |
| assert new_passages[0]['text'] == "The quick brown fox jumps over the tree" | |
| assert new_passages[1]['coordinates'] == "2" | |
| assert new_passages[1]['text'] == "and went straight into the mouth of a bear." | |
| assert new_passages[2]['coordinates'] == "3" | |
| assert new_passages[2]['text'] == "The color of the colors is a color with colors" | |
| assert new_passages[3]['coordinates'] == "4" | |
| assert new_passages[3]['text'] == "the main colors are not the colorw we show" | |
| def test_merge_passages_big_chunk(): | |
| merger = TextMerger() | |
| passages = [ | |
| { | |
| 'text': "The quick brown fox jumps over the tree", | |
| 'coordinates': '1' | |
| }, | |
| { | |
| 'text': "and went straight into the mouth of a bear.", | |
| 'coordinates': '2' | |
| }, | |
| { | |
| 'text': "The color of the colors is a color with colors", | |
| 'coordinates': '3' | |
| }, | |
| { | |
| 'text': "the main colors are not the colorw we show", | |
| 'coordinates': '4' | |
| } | |
| ] | |
| new_passages = merger.merge_passages(passages, chunk_size=20, tolerance=0) | |
| assert len(new_passages) == 2 | |
| assert new_passages[0]['coordinates'] == "1;2" | |
| assert new_passages[0][ | |
| 'text'] == "The quick brown fox jumps over the tree and went straight into the mouth of a bear." | |
| assert new_passages[1]['coordinates'] == "3;4" | |
| assert new_passages[1][ | |
| 'text'] == "The color of the colors is a color with colors the main colors are not the colorw we show" | |