data_extraction_demo / dataSchema.py
Amamrnaf
"Added bank_details model to dataSchema.py and modified service model."
daaaba0
from pydantic import BaseModel, Field
from typing import Optional,List
from langchain_core.output_parsers import JsonOutputParser # type: ignore
class Noc_Residential_TimeSheetInformation(BaseModel):
"""Details of a timesheet entry."""
full_name: str = Field(..., description="Full name of the person.")
position_title: str = Field(..., description="Position title of the person.")
work_location: str = Field(..., description="Work location of the person.")
contractor: str = Field(..., description="Contractor's name.")
noc_id: str = Field(..., description="NOC ID of the person.")
month_year: str = Field(..., description="Month and year in MM/YYYY format.")
service_days_onshore: int = Field(0, description="Number of service days onshore.")
standby_days_onshore: int = Field(0, description="Number of standby days onshore in Doha.")
service_days_offshore: int = Field(0, description="Number of service days offshore.")
service_days_weekend_public_holiday: int = Field(0, description="Number of service days during weekends or public holidays.")
standby_extended_hitch_days_offshore: int = Field(0, description="Number of standby and extended hitch days offshore.")
extended_hitch_days_onshore: int = Field(0, description="Number of extended hitch days onshore for rotational personnel.")
overtime_Hourly_Rate_ONSHORE:int = Field(0,description="number of over time hours onshore (Over 8 hours) ")
overtime_Hourly_Rate_OFFSHORE:int = Field(0,description="number of over time hours offshore (Over 12 hours) ")
per_diem_days: int = Field(0, description="Number of Per Diem days for onshore/offshore rotational personnel.")
training_days: int = Field(0, description="Number of training days.")
travel_days: int = Field(0, description="Number of travel days.")
approval_status: str = Field("not approved",description="Indicates whether the NOC representative's name and date are present on the document (e.g., 'approved', 'not approved').")
approved_on : str = Field(...,description="DD/MM/YY of the stamp")
approved_by : str = Field(...,description="Name of the person who approved the document")
class Noc_Rotational_TimeSheetInformation(BaseModel):
"""Details of a timesheet entry."""
full_name: str = Field(..., description="Full name of the person.")
position_title: str = Field(..., description="Position title of the person.")
work_location: str = Field(..., description="Work location of the person.")
contractor: str = Field(..., description="Contractor's name.")
PO_number: Optional[str] = Field(None,description="Purchase order")
noc_id: str = Field(..., description="NOC ID of the person.")
month_year: str = Field(..., description="Month and year in MM/YYYY format.")
service_days_onshore: int = Field(0, description="Number of service days onshore.")
standby_days_onshore: int = Field(0, description="Number of standby days onshore in Doha.")
service_days_offshore: int = Field(0, description="Number of service days offshore.")
service_days_weekend_public_holiday: int = Field(0, description="Number of service days during weekends or public holidays.")
standby_extended_hitch_days_offshore: int = Field(0, description="Number of standby and extended hitch days offshore.")
extended_hitch_days_onshore: int = Field(0, description="Number of extended hitch days onshore for rotational personnel.")
overtime_Hourly_Rate_ONSHORE:int = Field(0,description="number of over time hours onshore (Over 8 hours) ")
overtime_Hourly_Rate_OFFSHORE:int = Field(0,description="number of over time hours offshore (Over 12 hours) ")
per_diem_days: int = Field(0, description="Number of Per Diem days for onshore/offshore rotational personnel.")
training_days: int = Field(0, description="Number of training days.")
travel_days: int = Field(0, description="Number of travel days.")
approval_status: str = Field("not approved",description="Indicates whether the NOC representative's name and date are present on the document (e.g., 'approved', 'not approved').")
approved_on : str = Field(...,description="DD/MM/YY of the stamp")
approved_by : str = Field(...,description="Name of the person who approved the document")
class Noc_TimeSheetInformation(BaseModel):
"""Details of a timesheet entry."""
position_title: str = Field(..., description="Position title of the person.")
work_location: str = Field(..., description="Work location ")
agency: str = Field(..., description="the agency")
noc_id: str = Field(..., description="NOC ID of the person.")
full_name: str = Field(..., description="Full name of the person.")
approval_status: str = Field("not approved",description="(e.g., 'approved', 'not approved').")
approved_on : str = Field(...,description="DD/MM/YY of the stamp")
approved_by : str = Field(...,description="Name of the person who approved the document")
service_days_onshore: int = Field(0, description="Number of service days onshore.")
standby_days_onshore: int = Field(0, description="Number of standby days onshore in Doha.")
service_days_offshore: int = Field(0, description="Number of service days offshore.")
service_days_weekend_public_holiday: int = Field(0, description="Number of service days during weekends or public holidays.")
standby_extended_hitch_days_offshore: int = Field(0, description="Number of standby and extended hitch days offshore.")
extended_hitch_days_onshore: int = Field(0, description="Number of extended hitch days onshore for rotational personnel.")
overtime_Hourly_Rate_ONSHORE:int = Field(0,description="number of over time hours onshore (Over 8 hours) ")
overtime_Hourly_Rate_OFFSHORE:int = Field(0,description="number of over time hours offshore (Over 12 hours) ")
per_diem_days: int = Field(0, description="Number of Per Diem days for onshore/offshore rotational personnel.")
training_days: int = Field(0, description="Number of training days.")
travel_days: int = Field(0, description="Number of travel days.")
class Noc_Item_Information(BaseModel):
"""Details of each item in the document."""
item_number : int = Field(...,description="the number of the item")
service_description_code: str = Field(..., description="Service description code for the item.")
completion_date: str = Field(..., description="Completion date of the service in DD/MM/YYYY format.")
quantity: float = Field(..., description="Quantity of the service or item provided.")
unit: str = Field(..., description="Unit of measurement, e.g., Days.")
unit_price: float = Field(..., description="Unit price.")
net_amount: float = Field(..., description="Total net amount for this item, calculated as quantity * unit price.")
service_details: str = Field(..., description="Detailed description of the service provided.")
class Noc_Document_Information(BaseModel):
"""Details of the entire document."""
position_title: str = Field(..., description="Position title of the person.")
location: Optional[str] = Field(None, description="Location where the service is rendered.")
mobilization_date: str = Field(..., description="Mobilization date in DD/MM/YYYY format.")
end_date: str = Field(..., description="End date of the contract in DD/MM/YYYY format.")
notice_period: Optional[str] = Field(None, description="Notice period for resignation, if applicable")
items: List[Noc_Item_Information] = Field(..., description="List of items or services provided in the document.")
class Noc_items(BaseModel):
items: List[Noc_Item_Information] = Field(None, description="List of items or services provided in the document.")
class Noc_total(BaseModel):
"""totals of the invoice"""
total_amount_wo_taxes : float = Field(...,description="total amount without taxes")
total_net_amount_of_order: float = Field(..., description="total net amount of order.")
total_amount_of_order: float = Field(..., description="total amount of order.")
class Noc_PurchaseOrderInformation(BaseModel):
"""Details of a purchase order entry."""
purchase_order_number: str = Field(..., description="The unique identifier for the purchase order.")
date: str = Field(..., description="Date of the purchase order in DD/MM/YYYY format.")
company_name: str = Field(..., description="Name of the company issuing the purchase order.")
address: str = Field(..., description="Address of the company issuing the purchase order.")
tel: Optional[str] = Field(None, description="Telephone number of the company.")
email: Optional[str] = Field(None, description="Email address of the company.")
final_shipping_address: Optional[str] = Field(None, description="Final shipping address for the order.")
buyer_contact_name: str = Field(..., description="Full name of the buyer contact.")
buyer_contact_company: str = Field(..., description="Company name of the buyer contact.")
buyer_contact_tel: Optional[str] = Field(None, description="Telephone number of the buyer contact.")
buyer_contact_email: Optional[str] = Field(None, description="Email address of the buyer contact.")
our_reference: Optional[str] = Field(None, description="under Our reference title.")
your_reference: Optional[str] = Field(None, description="under Your reference title.")
incoterms: Optional[str] = Field(None, description="Incoterms applicable to the order.")
total_value_of_order: str = Field(..., description="Total value of the purchase order.")
signed: bool = Field(..., description="Whether the document has been signed or not.")
signature_released_by: str = Field(None, description="Name of the person who released the purchase order.")
signature_date: Optional[str] = Field(None, description="Date the order was signed.")
class Noc_Clauses(BaseModel):
Clauses: str = Field(..., description="the contract clauses.")
class service(BaseModel):
service: str = Field(..., description="the service name.")
from_date: str = Field(None,description="starting date in DD/MM/YYYY format..")
to_date: str = Field(None,description="ending date in DD/MM/YYYY format.")
currency : str =Field(...,description="currency of the rate.")
fx: str = Field(None,description="foreign exchange.")
Number_of_days_hours: str = Field(...,description="number of hours or days for the service.")
rate: str = Field(..., description="the rate of the service.")
total: str = Field(...,description="total which is the rate* No of days/hours .")
class bank_details(BaseModel):
bank_name: str = Field(..., description="Name of the bank.")
swift_bic_code: str = Field(None, description=" SWIFT/BIC CODE.")
iban_number: str = Field(...,description="IBAN Number.")
beneficiary_name:str = Field(...,description="full name")
account_currency: str = Field(...,description="Account currency.")
expected_amount: str = Field(...,description="the amount.")
class Noc_Invoice(BaseModel):
"""Details of an invoice."""
invoice_date: str = Field(..., description="Date of the invoice in DD/MM/YYYY format")
invoice_number: str = Field(..., description="Unique identifier for the invoice.")
full_name: str = Field(..., description="Full name of the person.")
invoice_to:str = Field(...,description="email to send the invoice forward to.")
company_name: str = Field(...,description="company name")
address: str = Field(..., description="Address of the company.")
services : List[service] = Field(None,description="list of services in the table.")
sub_total: float = Field(...,description="the sub total.")
vat: float = Field(None, description="the vat.")
Withholding_tax : float = Field(None,description="Withholding Tax.")
total_due:float= Field(...,description="the total due.")
first_bank : List[bank_details]=Field(...,description="first bank informations.")
second_details : List[bank_details]=Field(...,description="second bank details")
Noc_timesheet_prompt = """
Based on the provided timesheet details, extract the following information:
- Position title of the person.
- Work location .
- the agency.
- NOC ID of the person.
- Name of the person.
- approval status
- date of the approval
- approved by
- Number of service days onshore
- Number of standby days onshore in Doha
- Number of service days offshore
- Number of service days during weekends or public holidays
- Number of standby and extended hitch days offshore
- Number of extended hitch days onshore for rotational personnel
- Number of over time hours onshore (Over 8 hours)
- Number of over time hours offshore (Over 12 hours)
- Number of Per Diem days for onshore/offshore rotational personnel
- Number of training days
- Number of travel days
"""
Noc_invoice_prompt ="""
Based on the provided timesheet details, extract the following information:
- Invoice date
- Invoice number
- Full name of the person
- Email to send the invoice forward to.
- Company name
- Address of the company
- List of services,for each existing service provide:
- the service name.
- starting date in DD/MM/YYYY format.
- ending date in DD/MM/YYYY format.
- currency of the rate.
- foreign exchange.
- number of hours or days for the service.
- the rate of the service.
- total which is the rate* No of days/hours .
- Sub total
- VAT or Withholding Tax.
- Total due
- First bank informations, for each existing bank provide, do not mess up the iban:
- Name of the bank.
- SWIFT/BIC CODE.
- IBAN Number.
- full name.
- Account currency.
- the amount.
- Second bank informations, for each existing bank provide,do not mess up the iban:
- Name of the bank.
- SWIFT/BIC CODE.
- IBAN Number.
- full name.
- Account currency.
- the amount.
"""
Noc_Res_timesheet_prompt = """
Based on the provided timesheet details, extract the following information:
- Full name of the person
- Position title of the person
- Work location
- Contractor's name
- NOC ID
- Month and year (in MM/YYYY format)
And from the bottom table :
- Number of service days onshore
- Number of standby days onshore in Doha
- Number of service days offshore
- Number of service days during weekends or public holidays
- Number of standby and extended hitch days offshore
- Number of extended hitch days onshore for rotational personnel
- Number of over time hours onshore (Over 8 hours)
- Number of over time hours offshore (Over 12 hours)
- Number of Per Diem days for onshore/offshore rotational personnel
- Number of training days
- Number of travel days
- Indicates whether the NOC representative's name and date are present on the document (e.g., 'approved', 'not approved').
- DD/MM/YY of the stamp
- Name of the person who approved the document
"""
Noc_Rot_timesheet_prompt = """
Based on the provided timesheet details, extract the following information:
- Full name of the person
- Position title of the person
- Work location
- Contractor's name
- PO number which is the Purchase order
- NOC ID
- Month and year (in MM/YYYY format)
And from the bottom table :
- Number of service days onshore
- Number of standby days onshore in Doha
- Number of service days offshore
- Number of service days during weekends or public holidays
- Number of standby and extended hitch days offshore
- Number of extended hitch days onshore for rotational personnel
- ONSHORE Overtime Hourly Rate (Over 8 hours)
- OFFSHORE Overtime Hourly Rate (Over 12 hours)
- Number of Per Diem days for onshore/offshore rotational personnel
- Number of training days
- Number of travel days
- Indicates whether the NOC representative's name and date are present on the document (e.g., 'approved', 'not approved').
- DD/MM/YY of the stamp
- Name of the person who approved the document
"""
invoice_first_page_prompt="""
Extract the following details from the provided purchase order document:
- Purchase Order Number: The unique identifier for the purchase order.
- Date: The date the purchase order was issued (format: DD/MM/YYYY).
- Company Name: The name of the company issuing the purchase order.
- Address: The address of the company issuing the purchase order.
- Telephone Number: The company's telephone number (if provided).
- Email: The company's email address (if provided).
- Final Shipping Address: The destination shipping address (if specified).
- Buyer Contact Name: The full name of the buyer's contact person.
- Buyer Contact Company: The company name of the buyer contact.
- Buyer Contact Telephone Number: The buyer contact's telephone number (if provided).
- Buyer Contact Email: The buyer contact's email address (if provided).
- Our Reference: Reference specified under the "Our Reference" section (if present).
- Your Reference: Reference specified under the "Your Reference" section (if present).
- Incoterms: Any applicable incoterms mentioned in the document (e.g., FOB, CIF).
- Total Value of the Order: The total monetary value of the purchase order (include currency).
- signed: Whether the document has been signed or not.
- Signature Released By: The name of the person who authorized or released the purchase order.
- Signature Date: The date when the order was signed (format: DD/MM/YYYY).
"""
invoice_item_page1_prompt = """
Given the document, extract the following information:
- Position Title: The role or title mentioned in the document.
- Location: The place where the service is being provided.
- Mobilization Date: The date work begins in DD-MM-YYYY format.
- End Date: The date the work ends in DD-MM-YYYY format.
- Notice Period: The required notice period for resignation or termination.
- Items: For each item in the document, provide:
- Service Description Code: A code identifying the service.
- Completion Date: The date the service was completed in DD-MM-YYYY format.
- Quantity: The amount of the item/service provided.
- Unit: The unit of measurement (e.g., Days, Hours).
- Unit Price: The price per unit.
- Net Amount: The total value for the item.
- Service Details:A description of the service, which follows the corresponding row for the item.
"""
invoice_item_pages_prompt = """
Given the document, extract the following information:
- Items:
- Service Description Code: A code identifying the service.
- Completion Date: The date the service was completed in DD-MM-YYYY format.
- Quantity: The amount of the item/service provided.
- Unit: The unit of measurement (e.g., Days, Hours).
- Unit Price: The price per unit.
- Net Amount: The total value for the item.
- Service Details:A description of the service, which follows the corresponding row for the item.
"""
invoice_total_page_prompt = """
extract from the document:
- Total Amount without taxes.
- Total net amount of order.
- Total amount of order.
"""
invoice_clauses_page_prompt = """
extract from the document the clauses """
# CHOOSING PARSER DEPENDING ON THE TYPE OF DOCUMENT
Noc_Res_timeSheet_parser = JsonOutputParser(pydantic_object=Noc_Residential_TimeSheetInformation)
Noc_Rot_timeSheet_parser = JsonOutputParser(pydantic_object=Noc_Rotational_TimeSheetInformation)
Noc_PurchaseOrder_information_parser = JsonOutputParser(pydantic_object=Noc_PurchaseOrderInformation)
Noc_PurchaseOrder_item1_parser = JsonOutputParser(pydantic_object=Noc_Document_Information)
Noc_PurchaseOrder_items_parser = JsonOutputParser(pydantic_object=Noc_items)
Noc_PurchaseOrder_total_parser = JsonOutputParser(pydantic_object=Noc_total)
Noc_PurchaseOrder_clauses_parser = JsonOutputParser(pydantic_object=Noc_Clauses)
Noc_invoice_parser_v1 = JsonOutputParser(pydantic_object=Noc_Invoice)
Noc_timesheet_parser_v1 = JsonOutputParser(pydantic_object=Noc_TimeSheetInformation)