profoz commited on
Commit
046385a
·
1 Parent(s): a00fe0a

added tokenization

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -84,7 +84,7 @@ def get_documents(document_text, crawl=crawl_urls):
84
 
85
  if tokenizing == "Don't (use entire body as document)":
86
  document_paragraphs = [body]
87
- elif tokenizing == 'Newline (split by \n)':
88
  document_paragraphs = [n for n in body.split('\n') if len(n) > 50]
89
 
90
  for document_paragraph in document_paragraphs:
 
84
 
85
  if tokenizing == "Don't (use entire body as document)":
86
  document_paragraphs = [body]
87
+ elif tokenizing == 'Newline (split by newline character)':
88
  document_paragraphs = [n for n in body.split('\n') if len(n) > 50]
89
 
90
  for document_paragraph in document_paragraphs: