liamcripwell commited on
Commit
b558add
·
verified ·
1 Parent(s): 19b0615

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +76 -0
README.md CHANGED
@@ -536,4 +536,80 @@ for y in result:
536
  # {"store_name": "Trader Joe's"}
537
  # {"names": ["John", "Mary", "James"]}
538
  # {"names": ["JOHN", "MARY", "JAMES"], "female_names": ["MARY"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  ```
 
536
  # {"store_name": "Trader Joe's"}
537
  # {"names": ["John", "Mary", "James"]}
538
  # {"names": ["JOHN", "MARY", "JAMES"], "female_names": ["MARY"]}
539
+ ```
540
+
541
+ ## Template Generation
542
+ If you want to convert existing schema files you have in other formats (e.g. XML, YAML, etc.) or start from an example, NuExtract 2 models can automatically generate this for you.
543
+
544
+ E.g. convert XML into a NuExtract template:
545
+ ```python
546
+ def generate_template(description):
547
+ input_messages = [description]
548
+ input_content = prepare_inputs(
549
+ messages=input_messages,
550
+ image_paths=[],
551
+ tokenizer=tokenizer,
552
+ )
553
+ generation_config = {"do_sample": True, "temperature": 0.4, "max_new_tokens": 256}
554
+ with torch.no_grad():
555
+ result = nuextract_generate(
556
+ model=model,
557
+ tokenizer=tokenizer,
558
+ prompts=input_content['prompts'],
559
+ pixel_values_list=input_content['pixel_values_list'],
560
+ num_patches_list=input_content['num_patches_list'],
561
+ generation_config=generation_config
562
+ )
563
+ return result[0]
564
+ xml_template = """<SportResult>
565
+ <Date></Date>
566
+ <Sport></Sport>
567
+ <Venue></Venue>
568
+ <HomeTeam></HomeTeam>
569
+ <AwayTeam></AwayTeam>
570
+ <HomeScore></HomeScore>
571
+ <AwayScore></AwayScore>
572
+ <TopScorer></TopScorer>
573
+ </SportResult>"""
574
+ result = generate_template(xml_template)
575
+
576
+ print(result)
577
+ # {
578
+ # "SportResult": {
579
+ # "Date": "date-time",
580
+ # "Sport": "verbatim-string",
581
+ # "Venue": "verbatim-string",
582
+ # "HomeTeam": "verbatim-string",
583
+ # "AwayTeam": "verbatim-string",
584
+ # "HomeScore": "integer",
585
+ # "AwayScore": "integer",
586
+ # "TopScorer": "verbatim-string"
587
+ # }
588
+ # }
589
+ ```
590
+
591
+ E.g. generate a template from natural language description:
592
+ ```python
593
+ text = """Give me relevant info about startup companies mentioned."""
594
+ result = generate_template(text)
595
+
596
+ print(result)
597
+ # {
598
+ # "Startup_Companies": [
599
+ # {
600
+ # "Name": "verbatim-string",
601
+ # "Products": [
602
+ # "string"
603
+ # ],
604
+ # "Location": "verbatim-string",
605
+ # "Company_Type": [
606
+ # "Technology",
607
+ # "Finance",
608
+ # "Health",
609
+ # "Education",
610
+ # "Other"
611
+ # ]
612
+ # }
613
+ # ]
614
+ # }
615
  ```