MasteredUltraInstinct commited on
Commit
326ef6a
Β·
verified Β·
1 Parent(s): 9c73391

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -9,12 +9,13 @@ processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
9
  def preprocess(ex):
10
  img = ex["image"].convert("RGB")
11
  inputs = processor(images=img, return_tensors="pt")
12
- labels = processor.tokenizer(ex["text"], truncation=True, padding="max_length", max_length=128).input_ids
13
  ex["pixel_values"] = inputs.pixel_values[0]
14
  ex["labels"] = labels
15
  return ex
16
 
17
- ds = ds.map(preprocess, remove_columns=["image", "text"])
 
18
 
19
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
20
  model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
 
9
  def preprocess(ex):
10
  img = ex["image"].convert("RGB")
11
  inputs = processor(images=img, return_tensors="pt")
12
+ labels = processor.tokenizer(ex["label"], truncation=True, padding="max_length", max_length=128).input_ids
13
  ex["pixel_values"] = inputs.pixel_values[0]
14
  ex["labels"] = labels
15
  return ex
16
 
17
+ ds = ds.map(preprocess, remove_columns=["image", "label"])
18
+
19
 
20
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
21
  model.config.decoder_start_token_id = processor.tokenizer.cls_token_id