getGO007 commited on
Commit
8fe0ddf
·
verified ·
1 Parent(s): 30b283c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -0
app.py CHANGED
@@ -22,6 +22,7 @@ class Tokenizer:
22
  ('NUMBER', r'\d+'),
23
  ('WORD', r'[A-Za-z]+'),
24
  ('SPACE', r'\s+'),
 
25
  ]
26
 
27
  combined_pattern = '|'.join(
 
22
  ('NUMBER', r'\d+'),
23
  ('WORD', r'[A-Za-z]+'),
24
  ('SPACE', r'\s+'),
25
+ ('PUNCT', r'[^\w\s]'), # <--- Added punctuation pattern
26
  ]
27
 
28
  combined_pattern = '|'.join(