Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +971 -0
- config.json +28 -0
- config_sentence_transformers.json +10 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.json +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,971 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:809
|
8 |
+
- loss:MultipleNegativesRankingLoss
|
9 |
+
base_model: sentence-transformers/all-distilroberta-v1
|
10 |
+
widget:
|
11 |
+
- source_sentence: Data pipeline architecture, Azure Data Factory, Apache Spark
|
12 |
+
sentences:
|
13 |
+
- 'Experience »
|
14 |
+
|
15 |
+
|
16 |
+
Prior experience working on a SAP ECC to SAP S4 Hana Migration Project.4+ years
|
17 |
+
in an ETL or Data Engineering roles; building and implementing data pipelines
|
18 |
+
and modeling data.Experience with SAP data and data structures.Experience managing
|
19 |
+
Snowflake instances, including data ingestion and modeling.Experience with IBM
|
20 |
+
DataStage is a plus.Very strong skills with SQL with the ability to write efficient
|
21 |
+
queries.Familiarity with Fivetran for replication.
|
22 |
+
|
23 |
+
|
24 |
+
What You’ll Do
|
25 |
+
|
26 |
+
|
27 |
+
Job requirements are met.Perform data analysis required to troubleshoot data related
|
28 |
+
issues and assist in the resolution of data issues.
|
29 |
+
|
30 |
+
|
31 |
+
Interested?
|
32 |
+
|
33 |
+
|
34 |
+
Qualified candidates should send their resumes to [email protected]
|
35 |
+
|
36 |
+
|
37 |
+
V-Soft Consulting Group is recognized among the top 100 fastest growing staffing
|
38 |
+
companies in North America, V-Soft Consulting Group is headquartered in Louisville,
|
39 |
+
KY with strategic locations in India, Canada and the U.S. V-Soft is known as an
|
40 |
+
agile, innovative technology services company holding several awards and distinctions
|
41 |
+
and has a wide variety of partnerships across diverse technology stacks.
|
42 |
+
|
43 |
+
|
44 |
+
As a valued V-Soft Consultant, you’re eligible for full benefits (Medical, Dental,
|
45 |
+
Vision), a 401(k) plan, competitive compensation and more. V-Soft is partnered
|
46 |
+
with numerous Fortune 500 companies, exceptionally positioned to advance your
|
47 |
+
career growth.
|
48 |
+
|
49 |
+
|
50 |
+
V-Soft Consulting provides equal employment opportunities to all employees and
|
51 |
+
applicants for employment and prohibits discrimination and harassment of any type
|
52 |
+
without regard to race, color, religion, age, sex, national origin, disability
|
53 |
+
status, genetics, protected veteran status, sexual orientation, gender identity
|
54 |
+
or expression, or any other characteristic protected by federal, state or local
|
55 |
+
laws.
|
56 |
+
|
57 |
+
|
58 |
+
For more information or to view all our open jobs, please visit www.vsoftconsulting.com
|
59 |
+
or call (844) 425-8425.'
|
60 |
+
- "experiences that leverage the latest technologies in open source and the Cloud.\
|
61 |
+
\ Digital Information Management (DIM) is a team of engineers committed to championing\
|
62 |
+
\ a data-driven decision-making culture and meets the business demand for timely\
|
63 |
+
\ insight-focused analytics and information delivery.\n\nYou will be working with\
|
64 |
+
\ all levels of technology from backend data processing technologies (Databricks/Apache\
|
65 |
+
\ Spark) to other Cloud computing technologies / Azure Data Platform. You should\
|
66 |
+
\ be a strong analytical thinker, detail-oriented and love working with data with\
|
67 |
+
\ a strong background in data engineering and application development. Must be\
|
68 |
+
\ a hand-on technologist passionate about learning new technologies and help improve\
|
69 |
+
\ the ways we can better leverage Advanced Analytics and Machine Learning.\n\n\
|
70 |
+
Responsibilities\n\nBuild end-to-end direct capabilities.Create and maintain optimal\
|
71 |
+
\ data pipeline architecture.Build the infrastructure required for optimal extraction,\
|
72 |
+
\ transformation, and loading of data from a wide variety of data sources.Use\
|
73 |
+
\ analytics for capitalizing on the data for making decisions and achieving better\
|
74 |
+
\ outcomes for the business.Derive insights to differentiate member and team member\
|
75 |
+
\ experiences. Collaborate with cross-functional teams.Analyze and define with\
|
76 |
+
\ product teams the data migration and data integration strategies.Apply experience\
|
77 |
+
\ in analytics, data visualization and modeling to find solutions for a variety\
|
78 |
+
\ of business and technical problems.Querying and analyzing small and large data\
|
79 |
+
\ sets to discover patterns and deliver meaningful insights. Integrate source\
|
80 |
+
\ systems with information management solutions and target systems for automated\
|
81 |
+
\ migration processes.Create proof-of-concepts to demonstrate viability of solutions\
|
82 |
+
\ under consideration.\n\n\nQualifications\n\nBachelor’s degree in computer science,\
|
83 |
+
\ information systems, or other technology-related field or equivalent number\
|
84 |
+
\ of years of experience.Advanced hands-on experience implementing and supporting\
|
85 |
+
\ large scale data processing pipelines and migrations using technologies (eg.\
|
86 |
+
\ Azure Services, Python programming).Significant hands-on experience with Azure\
|
87 |
+
\ services such as Azure Data Factory (ADF), Azure Databricks, Azure Data Lake\
|
88 |
+
\ Storage (ADLS Gen2), Azure SQL, and other data sources. Significant hands-on\
|
89 |
+
\ experience designing and implementing reusable frameworks using Apache Spark\
|
90 |
+
\ (PySpark preferred or Java/Scala).Solid foundation in data structures, algorithms,\
|
91 |
+
\ design patterns and strong analytical and problem-solving skills.Strong hands-on\
|
92 |
+
\ experience leading design thinking as well as the ability to translate ideas\
|
93 |
+
\ to clearly articulate technical solutions. Experience with any of the following\
|
94 |
+
\ Analytics and Information Management competencies: Data Management and Architecture,\
|
95 |
+
\ Performance Management, Information Delivery and Advanced Analytics.\n\n\nDesired\
|
96 |
+
\ Qualifications\n\nProficiency in collaborative coding practices, such as pair\
|
97 |
+
\ programming, and ability to thrive in a team-oriented environment.The following\
|
98 |
+
\ certifications:Microsoft Certified Azure Data EngineerMicrosoft Certified Azure\
|
99 |
+
\ Solutions ArchitectDatabricks Certified Associate Developer for Apache 2.4/3.0\n\
|
100 |
+
Hours: Monday - Friday, 8:00AM - 4:30PM\n\nLocation: 820 Follin Lane, Vienna,\
|
101 |
+
\ VA 22180 | 5510 Heritage Oaks Drive Pensacola, FL 32526 | 141 Security Drive\
|
102 |
+
\ Winchester, VA 22602\n\nAbout Us\n\nYou have goals, dreams, hobbies, and things\
|
103 |
+
\ you're passionate about—what's important to you is important to us. We're looking\
|
104 |
+
\ for people who not only want to do meaningful, challenging work, keep their\
|
105 |
+
\ skills sharp and move ahead, but who also take time for the things that matter\
|
106 |
+
\ to them—friends, family, and passions. And we're looking for team members who\
|
107 |
+
\ are passionate about our mission—making a difference in military members' and\
|
108 |
+
\ their families' lives. Together, we can make it happen. Don't take our word\
|
109 |
+
\ for it:\n\n Military Times 2022 Best for Vets Employers WayUp Top 100 Internship\
|
110 |
+
\ Programs Forbes® 2022 The Best Employers for New Grads Fortune Best Workplaces\
|
111 |
+
\ for Women Fortune 100 Best Companies to Work For® Computerworld® Best Places\
|
112 |
+
\ to Work in IT Ripplematch Campus Forward Award - Excellence in Early Career\
|
113 |
+
\ Hiring Fortune Best Place to Work for Financial and Insurance Services\n\n\n\
|
114 |
+
\n\nDisclaimers: Navy Federal reserves the right to fill this role at a higher/lower\
|
115 |
+
\ grade level based on business need. An assessment may be required to compete\
|
116 |
+
\ for this position. Job postings are subject to close early or extend out longer\
|
117 |
+
\ than the anticipated closing date at the hiring team’s discretion based on qualified\
|
118 |
+
\ applicant volume. Navy Federal Credit Union assesses market data to establish\
|
119 |
+
\ salary ranges that enable us to remain competitive. You are paid within the\
|
120 |
+
\ salary range, based on your experience, location and market position\n\nBank\
|
121 |
+
\ Secrecy Act: Remains cognizant of and adheres to Navy Federal policies and procedures,\
|
122 |
+
\ and regulations pertaining to the Bank Secrecy Act."
|
123 |
+
- "Data AnalystDakota Dunes, SD\nEntry Level SQL, Run SQL The queries. Client is\
|
124 |
+
\ using ThoughtspotUnderstanding of Dashbord and Proficient in Microsoft Office\
|
125 |
+
\ and excel \nPlease share your profile to [email protected] or reach\
|
126 |
+
\ me on 619 771 1188."
|
127 |
+
- source_sentence: Customer data management, regulatory compliance, advanced Excel
|
128 |
+
and Access proficiency
|
129 |
+
sentences:
|
130 |
+
- 'skills, attention to detail, and experience working with data in Excel. The candidate
|
131 |
+
must enjoy collaborative work, actively participate in the development of team
|
132 |
+
presentations, and engage in review of other analyst findings. ResponsibilitiesThe
|
133 |
+
Junior Analyst will be responsible for examining data from different sources with
|
134 |
+
the goal of providing insights into NHLBI, its mission, business processes, and
|
135 |
+
information systems. Responsibilities for this position include:Develop a strong
|
136 |
+
understanding of the organization, functions, and data sources to be able to ensure
|
137 |
+
analytical sources and methodologies are appropriately applied for the data need.Develop
|
138 |
+
clear and well-structured analytical plans.Ensure data sources, assumptions, methodologies,
|
139 |
+
and visualization approaches are consistent with prior work by the OPAE.Assess
|
140 |
+
the validity of source data and subsequent findings.Produce high quality, reliable
|
141 |
+
data analysis on a variety of functional areas.Explain the outcome/results by
|
142 |
+
identifying trends and creating visualizations.Use best practices in data analysis
|
143 |
+
and visualization.Exhibit results, conclusions, and recommendations to leadership,
|
144 |
+
and customize presentations to align with various audiences.Document and communicate
|
145 |
+
analysis results (briefings, reports, and/or backup analysis files) in a manner
|
146 |
+
that clearly articulates the approach, results, and data-driven recommendations.Continually
|
147 |
+
assess all current activities and proactively communicate potential issues and/or
|
148 |
+
challenges.May support data scientists on various projects. Qualifications Minimum
|
149 |
+
qualifications:Bachelor’s degree in data science or related fields.Minimum of
|
150 |
+
2 years of demonstrable experience in data analysis.Must have 2 years of experience
|
151 |
+
in using Excel for data analysis and visualization andWillingness to learn basic
|
152 |
+
data science tools and methodologies.Intermediate to advanced proficiency with
|
153 |
+
industry-standard word processing, spreadsheet, and presentation software programs.Excellent
|
154 |
+
verbal and written communication skills.Strong attention to detail.Collaborative
|
155 |
+
team player.Proven problem solving and critical thinking skills.Must be able to
|
156 |
+
obtain Public Trust Clearance.US work authorization (we participate in E-Verify).
|
157 |
+
Preferred qualifications:Proficient in the use of basic data science tools and
|
158 |
+
methodologies (python, SQL, machine learning).MS in data science or related fields.
|
159 |
+
|
160 |
+
Salary and benefitsWe offer a competitive salary and a generous benefits package,
|
161 |
+
including full health and dental, HSA and retirement accounts, short- and long-term
|
162 |
+
disability insurance, life insurance, paid time off and 11 federal holidays. Location:
|
163 |
+
Washington DC, Hybrid'
|
164 |
+
- SKILLS – Very Strong, Microsoft Excel (Pivot Tables, Sumifs, Vlookups etc), Data
|
165 |
+
manipulation, Logistics and operations terminology Job SummaryApple AMR Ops Logistics
|
166 |
+
is looking for an experienced Data Analyst to support its Business Analytics team.
|
167 |
+
This position will be responsible for ensuring maintenance and frequent updates
|
168 |
+
to Apple’s internal Shipping Exceptions Management System. The position will work
|
169 |
+
closely with AMR Logistics stakeholders to ensure timely execution of daily jobs
|
170 |
+
by transforming data in Excel into Apple’s internal tools. Key Responsibilities•
|
171 |
+
Review multiple Excel reports and ensure timely uploads into the Shipping Exceptions
|
172 |
+
Management System• Develop robust data visualizations that will help to answer
|
173 |
+
commonly asked questions quickly and thoroughly about Shipping Exceptions• Identify
|
174 |
+
data anomalies, work to root cause and remediate issues in data collection, storage,
|
175 |
+
transformation, or reporting Key Qualifications1 – 2 years of work experience
|
176 |
+
preferredSkilled in Excel and data manipulation (mandatory)Familiarity with Logistics
|
177 |
+
and Operations terminologyFamiliarity with Business Objects a plusAbility to create
|
178 |
+
cross-platform reportsAbility to turn data into information and insightsHigh-level
|
179 |
+
attention to detail, including the ability to spot data errors and potential issues
|
180 |
+
in Apple’s internal systems Hard Skills:Microsoft Excel (Pivot Tables, Sumifs,
|
181 |
+
Vlookups etc)Good Verbal and Communication skills
|
182 |
+
- 'Qualifications:0-2 years relevant experienceAdvanced knowledge of MS Office Suite,
|
183 |
+
including proficiency in Excel and Access.Consistently demonstrates clear and
|
184 |
+
concise written and verbal communication skills.Demonstrated organization skills
|
185 |
+
with an excellent attention to detail.Ability to focus on high quality work.
|
186 |
+
|
187 |
+
Education:Bachelor’s/University degree or equivalent experiencePlease share with
|
188 |
+
me your updated resume if you are interested in applying for this role.
|
189 |
+
|
190 |
+
Dexian is a leading provider of staffing, IT, and workforce solutions with over
|
191 |
+
12,000 employees and 70 locations worldwide. As one of the largest IT staffing
|
192 |
+
companies and the 2nd largest minority-owned staffing company in the U.S., Dexian
|
193 |
+
was formed in 2023 through the merger of DISYS and Signature Consultants. Combining
|
194 |
+
the best elements of its core companies, Dexian''s platform connects talent, technology,
|
195 |
+
and organizations to produce game-changing results that help everyone achieve
|
196 |
+
their ambitions and goals.Dexian''s brands include Dexian DISYS, Dexian Signature
|
197 |
+
Consultants, Dexian Government Solutions, Dexian Talent Development and Dexian
|
198 |
+
IT Solutions. Visit https://dexian.com/ to learn more.Dexian is'
|
199 |
+
- source_sentence: Clarity PPM reporting, data dashboard customization, performance
|
200 |
+
quality assurance
|
201 |
+
sentences:
|
202 |
+
- "skills and the ability to connect and communicate across multiple departments.Adept\
|
203 |
+
\ at report writing and presenting findings.Ability to work under pressure and\
|
204 |
+
\ meet tight deadlines.Be able to read and update project and program level resource\
|
205 |
+
\ forecasts.Identify recurring process issues and work with managers to find solutions\
|
206 |
+
\ and initiate improvements to mitigate future recurrence. \nSkills and Qualifications:5+\
|
207 |
+
\ years in a Data Analyst and/or Data Scientist capacity.5 years of experience\
|
208 |
+
\ with Clarity PPM reporting, developing data dashboards, charts and datasets\
|
209 |
+
\ in Clarity.Strong knowledge of and experience with reporting packages (Business\
|
210 |
+
\ Objects, Tableau, Power BI, etc.), databases (SQL), programming (XML, JavaScript,\
|
211 |
+
\ etc.).Knowledge of statistics and experience using statistical packages for\
|
212 |
+
\ analyzing datasets (Excel, SAS, R, SPSS, etc.)High understanding of PPM disciplines\
|
213 |
+
\ has worked in a team and covered strategic projects. Experience with Dashboard\
|
214 |
+
\ customization, configuration, user interface personalization and infrastructure\
|
215 |
+
\ management will be helpful.Strong analytical skills with the ability to collect,\
|
216 |
+
\ organize, analyze, and disseminate significant amounts of information with attention\
|
217 |
+
\ to detail, accuracy, and actionable insights.Excellent communicator, adjusting\
|
218 |
+
\ communication styles based on your audience.Quick learner, adaptable and can\
|
219 |
+
\ thrive in new environments.Proactive, confident, and engaging; especially when\
|
220 |
+
\ it comes to large stakeholder groups.Capable of critically evaluating data to\
|
221 |
+
\ derive meaningful, actionable insights.Demonstrate superior communication and\
|
222 |
+
\ presentation capabilities, adept at simplifying complex data insights for audiences\
|
223 |
+
\ without a technical background."
|
224 |
+
- "skills and current Lubrizol needs):\n\nCreate predictive models by mining complex\
|
225 |
+
\ data for critical formulating or testing insights Implement and assess algorithms\
|
226 |
+
\ in R, Python, SAS, JMP or C#/C++ Research and implement new statistical, machine\
|
227 |
+
\ learning and/or optimization approaches (PhD level)Collaborate with data science\
|
228 |
+
\ team, as well as, scientists and engineers, to understand their needs, and find\
|
229 |
+
\ creative solutions to meet those needs \n\nPrevious Intern Projects Include\n\
|
230 |
+
\nPredictive modeling using Bayesian and machine learning methods R/Shiny tool\
|
231 |
+
\ development to enable model predictions and formulation optimization Creation\
|
232 |
+
\ of an interactive visualization tool for monitoring predictive models Multitask\
|
233 |
+
\ learning (transfer learning) using co-regionalized Gaussian Processes (PhD level)Multi-objective\
|
234 |
+
\ optimization using genetic algorithms (PhD level)Survival modeling using bagged\
|
235 |
+
\ Cox proportional hazards regression trees (PhD level)Bootstrap variance estimation\
|
236 |
+
\ for complex nonlinear models (PhD level)\n\nWhat tools do you need for success?\n\
|
237 |
+
\nEnrolled in a Masters or PhD program such as statistics, data analytics, machine\
|
238 |
+
\ learningExcellent programming skills with the ability to learn new methods quicklyExposure\
|
239 |
+
\ to database systems and the ability to efficiently manipulate complex data Interest\
|
240 |
+
\ and experience in advanced statistical modeling/machine learning methods (PhD\
|
241 |
+
\ level)Coursework in statistical modeling and data mining methodsCuriosity and\
|
242 |
+
\ creativity\n\nBenefits Of Lubrizol’s Chemistry Internship Programs\n\nRewarding\
|
243 |
+
\ your hard work!Competitive payHoliday pay for holidays that fall within your\
|
244 |
+
\ work periodFUN! We host a variety of events and activities for our students.\
|
245 |
+
\ Past events include a Cleveland Cavaliers game, paid volunteering days, professional\
|
246 |
+
\ development and networking events, and even a picnic hosted by our CEO!\nWhile\
|
247 |
+
\ headquartered in the United States, Lubrizol is truly a global specialty chemical\
|
248 |
+
\ company. We have a major presence in five global regions and do business in\
|
249 |
+
\ more than 100 countries. Our corporate culture ensures that Lubrizol is one\
|
250 |
+
\ company throughout the world, but you will find each region is a unique place\
|
251 |
+
\ to work, live and play.\n\nLubrizol is"
|
252 |
+
- 'experience with agile engineering and problem-solving creativity. United by our
|
253 |
+
core values and our purpose of helping people thrive in the brave pursuit of next,
|
254 |
+
our 20,000+ people in 53 offices around the world combine experience across technology,
|
255 |
+
data sciences, consulting and customer obsession to accelerate our clients’ businesses
|
256 |
+
through designing the products and services their customers truly value.
|
257 |
+
|
258 |
+
Job Description
|
259 |
+
|
260 |
+
This position requires in-depth knowledge and expertise in GCP services, architecture,
|
261 |
+
and best practices. Will work closely with clients to understand their business
|
262 |
+
objectives and develop strategies to leverage GCP to meet their needs. They will
|
263 |
+
collaborate with cross-functional teams to design, implement, and manage scalable
|
264 |
+
and reliable cloud solutions. They will also be responsible for driving innovation
|
265 |
+
and staying up-to-date with the latest GCP technologies and trends to provide
|
266 |
+
industry-leading solutions.
|
267 |
+
|
268 |
+
Your Impact:
|
269 |
+
|
270 |
+
Collaborate with clients to understand their business requirements and design
|
271 |
+
GCP architecture to meet their needs.Develop and implement cloud strategies, best
|
272 |
+
practices, and standards to ensure efficient and effective cloud utilization.Work
|
273 |
+
with cross-functional teams to design, implement, and manage scalable and reliable
|
274 |
+
cloud solutions on GCP.Provide technical guidance and mentorship to the team to
|
275 |
+
develop their skills and expertise in GCP.Stay up-to-date with the latest GCP
|
276 |
+
technologies, trends, and best practices and assess their applicability to client
|
277 |
+
solutions.Drive innovation and continuous improvement in GCP offerings and services
|
278 |
+
to provide industry-leading solutions.Collaborate with sales and business development
|
279 |
+
teams to identify and pursue new business opportunities related to GCP.Ensure
|
280 |
+
compliance with security, compliance, and governance requirements in GCP solutions.Develop
|
281 |
+
and maintain strong relationships with clients, vendors, and internal stakeholders
|
282 |
+
to promote the adoption and success of GCP solutions.
|
283 |
+
|
284 |
+
Qualifications
|
285 |
+
|
286 |
+
Must have good implementationexperience onvariousGCP’s Data Storage and Processing
|
287 |
+
services such as BigQuery, Dataflow, Bigtable, Dataform, Data fusion, cloud spanner,
|
288 |
+
Cloud SQLMust have programmatic experience with tools like Javascript, Python,
|
289 |
+
Apache Spark.Experience in building advance Bigquery SQL and Bigquery modelling
|
290 |
+
is requiredExperience in orchestrating end-end data pipelines with tools like
|
291 |
+
cloud composer, Dataform is highly desired.Experience in managing complex and
|
292 |
+
reusable dataflow pipelines is highly desired.
|
293 |
+
|
294 |
+
What sets you apart:
|
295 |
+
|
296 |
+
Experience in complex migrations from legacy data warehousing solutions or on-prem
|
297 |
+
datalakes to GCPExperience in maneuvering resources in delivering tight projectsExperience
|
298 |
+
in building real-time ingestion and processing frameworks on GCP.Adaptability
|
299 |
+
to learn new technologies and products as the job demands.Experience in implementing
|
300 |
+
Data-governance solutionsKnowledge in AI, ML and GEN-AI use casesMulti-cloud &
|
301 |
+
hybrid cloud experienceAny cloud certification
|
302 |
+
|
303 |
+
Additional Information
|
304 |
+
|
305 |
+
Flexible vacation policy; Time is not limited, allocated, or accrued16 paid holidays
|
306 |
+
throughout the yearGenerous parental leave and new parent transition programTuition
|
307 |
+
reimbursementCorporate gift matching program
|
308 |
+
|
309 |
+
Career Level: Senior Associate
|
310 |
+
|
311 |
+
Base Salary Range for the Role: 115,000-150,000 (varies depending on experience)
|
312 |
+
The range shown represents a grouping of relevant ranges currently in use at Publicis
|
313 |
+
Sapient. Actual range for this position may differ, depending on location and
|
314 |
+
specific skillset required for the work itself.'
|
315 |
+
- source_sentence: Go-to-Market strategy, Salesforce dashboard development, SQL data
|
316 |
+
analysis
|
317 |
+
sentences:
|
318 |
+
- "experience: from patients finding clinics and making appointments, to checking\
|
319 |
+
\ in, to clinical documentation, and to the final bill paid by the patient. Our\
|
320 |
+
\ team is committed to changing healthcare for the better by innovating and revolutionizing\
|
321 |
+
\ on-demand healthcare for millions of patients across the country.\n\nExperity\
|
322 |
+
\ offers the following:\n\nBenefits – Comprehensive coverage starts first day\
|
323 |
+
\ of employment and includes Medical, Dental/Orthodontia, and Vision.Ownership\
|
324 |
+
\ - All Team Members are eligible for synthetic ownership in Experity upon one\
|
325 |
+
\ year of employment with real financial rewards when the company is successful!Employee\
|
326 |
+
\ Assistance Program - This robust program includes counseling, legal resolution,\
|
327 |
+
\ financial education, pet adoption assistance, identity theft and fraud resolution,\
|
328 |
+
\ and so much more.Flexibility – Experity is committed to helping team members\
|
329 |
+
\ face the demands of juggling work, family and life-related issues by offering\
|
330 |
+
\ flexible work scheduling to manage your work-life balance.Paid Time Off (PTO)\
|
331 |
+
\ - Experity offers a generous PTO plan and increases with milestones to ensure\
|
332 |
+
\ our Team Members have time to recharge, relax, and spend time with loved ones.Career\
|
333 |
+
\ Development – Experity maintains a learning program foundation for the company\
|
334 |
+
\ that allows Team Members to explore their potential and achieve their career\
|
335 |
+
\ goals.Team Building – We bring our Team Members together when we can to strengthen\
|
336 |
+
\ the team, build relationships, and have fun! We even have a family company picnic\
|
337 |
+
\ and a holiday party.Total Compensation - Competitive pay, quarterly bonuses\
|
338 |
+
\ and a 401(k) retirement plan with an employer match to help you save for your\
|
339 |
+
\ future and ensure that you can retire with financial security.\n\nHybrid workforce:\n\
|
340 |
+
\nExperity offers Team Members the opportunity to work remotely or in an office.\
|
341 |
+
\ While this position allows remote work, we require Team Members to live within\
|
342 |
+
\ a commutable distance from one of our locations to ensure you are available\
|
343 |
+
\ to come into the office as needed.\n\nJob Summary: \n\nWe are seeking a highly\
|
344 |
+
\ skilled and data-driven Go-to-Market (GTM) Data Analyst to join our team. The\
|
345 |
+
\ ideal candidate will be adept at aggregating and analyzing data from diverse\
|
346 |
+
\ sources, extracting valuable insights to inform strategic decisions, and proficient\
|
347 |
+
\ in building dynamic dashboards in Salesforce and other BI tools. Your expertise\
|
348 |
+
\ in SQL and data analytics will support our go-to-market strategy, optimize our\
|
349 |
+
\ sales funnel, and contribute to our overall success.\n\nExperience: \n\nBachelor’s\
|
350 |
+
\ or Master’s degree in Data Science, Computer Science, Information Technology,\
|
351 |
+
\ or a related field.Proven experience as a Data Analyst or similar role, with\
|
352 |
+
\ a strong focus on go-to-market strategies.Expertise in SQL and experience with\
|
353 |
+
\ database management.Proficiency in Salesforce and other BI tools (e.g., Tableau,\
|
354 |
+
\ Power BI).Strong analytical skills with the ability to collect, organize, analyze,\
|
355 |
+
\ and disseminate significant amounts of information with attention to detail\
|
356 |
+
\ and accuracy.Excellent communication and presentation skills, capable of conveying\
|
357 |
+
\ complex data insights in a clear and persuasive manner.Adept at working in fast-paced\
|
358 |
+
\ environments and managing multiple projects simultaneously.Familiarity with\
|
359 |
+
\ sales and marketing metrics, and how they impact business decisions.\n\nBudgeted\
|
360 |
+
\ salary range:\n\n$66,900 to $91,000\n\nTeam Member Competencies:\n\nUnderstands\
|
361 |
+
\ role on the team and works to achieve goals to the best of your ability.Working\
|
362 |
+
\ within a team means there will be varying opinions and ideas. Active listening\
|
363 |
+
\ and thoughtfully responding to what your team member says.Take responsibility\
|
364 |
+
\ for your mistakes and look for solutions. Understand how your actions impact\
|
365 |
+
\ team.Provides assistance, information, or other support to others to build or\
|
366 |
+
\ maintain relationships.Maintaining a positive attitude. Tackle challenges as\
|
367 |
+
\ they come, and don’t let setbacks get you down.Gives honest and constructive\
|
368 |
+
\ feedback to other team members.When recognizing a problem, take action to solve\
|
369 |
+
\ it.Demonstrates and supports the organization's core values.\n\nEvery team member\
|
370 |
+
\ exhibits our core values:\n\nTeam FirstLift Others UpShare OpenlySet and Crush\
|
371 |
+
\ GoalsDelight the Client\n\nOur urgent care solutions include:\n\nElectronic\
|
372 |
+
\ Medical Records (EMR): Software that healthcare providers use to input patient\
|
373 |
+
\ data, such as medical history, diagnoses, treatment plans, medications, and\
|
374 |
+
\ test results.Patient Engagement (PE): Software that shows patients the wait\
|
375 |
+
\ times at various clinics, allows patients to reserve a spot in line if there's\
|
376 |
+
\ a wait, and book the appointment.Practice Management (PM): Software that the\
|
377 |
+
\ clinic front desk staff uses to register the patient once they arrive for their\
|
378 |
+
\ appointment.Billing and Revenue Cycle Management (RCM): Software that manages\
|
379 |
+
\ coding, billing and payer contracts for clinics so they don’t have to.Teleradiology:\
|
380 |
+
\ Board certified radiologist providing accurate and timely reads of results from\
|
381 |
+
\ X-rays, CT scans, MRIs, and ultrasounds, for our urgent care clients.Consulting:\
|
382 |
+
\ Consulting services for urgent care clinics to assist with opening, expanding\
|
383 |
+
\ and enhancing client's businesses"
|
384 |
+
- 'experience with Cloud Engineering / Services.3+ years of work experience as a
|
385 |
+
backend software engineer in Python with exceptional software engineering knowledge.
|
386 |
+
Experience with ML workflow orchestration tools: Airflow, Kubeflow etc. Advanced
|
387 |
+
working knowledge of object-oriented/object function programming languages: Python,
|
388 |
+
C/C++, JuliaExperience in DevOps: Jenkins/Tekton etc. Experience with cloud services,
|
389 |
+
preferably GCP Services like Vertex AI, Cloud Function, BigQuery etc. Experience
|
390 |
+
in container management solution: Kubernetes, Docker.Experience in scripting language:
|
391 |
+
Bash, PowerShell etc. Experience with Infrastructure as code: Terraform etc.
|
392 |
+
|
393 |
+
Skills Preferred:Master focused on Computer Science / Machine Learning or related
|
394 |
+
field. Experience working with Google Cloud platform (GCP) - specifically Google
|
395 |
+
Kubernetes engine, Terraform, and infrastructure.Experience in delivering cloud
|
396 |
+
engineering products.Experience in programming concepts such as Paired Programming,
|
397 |
+
Test Driven Development, etc. Understanding of MLOPs/Machine Learning Life Cycle
|
398 |
+
and common machine learning frameworks: sklearn, TensorFlow, pytorch etc. is a
|
399 |
+
big plus.Must be a quick learner and open to learning new technology. Experience
|
400 |
+
applying agile practices to solution delivery. Experience in all phases of the
|
401 |
+
development lifecycle. Must be team-oriented and have excellent oral and written
|
402 |
+
communication skills. Good organizational and time-management skills. Must be
|
403 |
+
a self-starter to understand existing bottlenecks and come up with innovative
|
404 |
+
solutions. Knowledge of coding and software craftsmanship practices.Experience
|
405 |
+
and good understanding of GCP processing /DevOPs/ Machine Learning'
|
406 |
+
- "Skills\n\n Good banking domain background with Advanced SQL knowledge is\
|
407 |
+
\ a MUST \n\n Expert in Advanced Excel functions used for data analysis Ability\
|
408 |
+
\ to Understand Physical and Logical Data Models and understanding of Data Quality\
|
409 |
+
\ Concepts. Write SQL Queries to pull/fetch data from systems/DWH Understanding\
|
410 |
+
\ of Data WareHousing concepts Understanding the Data Movement between Source\
|
411 |
+
\ and Target applications and perform data quality checks to maintain the data\
|
412 |
+
\ integrity, accuracy and consistency Experience in analysis/reconciliation of\
|
413 |
+
\ data as per the business requirements Conduct research and Analysis in order\
|
414 |
+
\ to come up with solution to business problems Understanding requirements directly\
|
415 |
+
\ from clients/ client stakeholders and writing code to extract relevant data\
|
416 |
+
\ and produce report\n\nExperience Required\n\n10-12 Years\n\nRoles & Responsibilities\n\
|
417 |
+
\nInterpret data, analyze results using Data Analysis techniques and provide ongoing\
|
418 |
+
\ reports\n\n Develop and implement databases, data repositories for performing\
|
419 |
+
\ analysis Acquire data from primary or secondary data sources and maintain databases/data\
|
420 |
+
\ repositories Identify, analyze, and interpret trends or patterns in complex\
|
421 |
+
\ data sets Filter and “clean” data by reviewing computer reports, printouts,\
|
422 |
+
\ and performance indicators to locate and correct code problems ; Work with management\
|
423 |
+
\ to prioritize business and information needs Locate and define new process improvement\
|
424 |
+
\ opportunities Good exposure and hands on exp with Excel features used for data\
|
425 |
+
\ analysis & reporting"
|
426 |
+
- source_sentence: Senior Data Scientist, Statistical Analysis, Data Interpretation,
|
427 |
+
TS/SCI Clearance
|
428 |
+
sentences:
|
429 |
+
- Skills :8+ years of relevant experienceExperience with big data technology(s)
|
430 |
+
or ecosystem in Hadoop, HDFS (also an understanding of HDFS Architecture), Hive,
|
431 |
+
Map Reduce, Base - this is considering all of AMP datasets are in HDFS/S3Advanced
|
432 |
+
SQL and SQL performance tuningStrong experience in Spark and Scala
|
433 |
+
- 'experience, regulatory compliance & operational efficiencies, enabled by Google
|
434 |
+
Cloud.
|
435 |
+
|
436 |
+
|
437 |
+
This position will lead integration of core data from New North America Lending
|
438 |
+
platforms into Data Factory (GCP BQ), and build upon the existing analytical data,
|
439 |
+
including merging historical data from legacy platforms with data ingested from
|
440 |
+
new platforms. To enable critical regulatory reporting, operational analytics,
|
441 |
+
risk analytics and modeling
|
442 |
+
|
443 |
+
|
444 |
+
Will provide overall technical guidance to implementation teams and oversee adherence
|
445 |
+
to engineering patterns and data quality and compliance standards, across all
|
446 |
+
data factory workstreams. Support business adoption of data from new platform
|
447 |
+
and sunset of legacy platforms & technology stack.
|
448 |
+
|
449 |
+
|
450 |
+
This position will collaborate with technical program manager, data platform enablement
|
451 |
+
manager, analytical data domain leaders, subject matter experts, supplier partners,
|
452 |
+
business partner and IT operations teams to deliver the Data integration workstream
|
453 |
+
plan following agile framework.
|
454 |
+
|
455 |
+
|
456 |
+
Responsibilities
|
457 |
+
|
458 |
+
|
459 |
+
We are looking for dynamic, technical leader with prior experience of leading
|
460 |
+
data warehouse as part of complex business & tech transformation. Has strong experience
|
461 |
+
in Data Engineering, GCP Big Query, Data ETL pipelines, Data architecture, Data
|
462 |
+
Governance, Data protection, security & compliance, and user access enablement.
|
463 |
+
|
464 |
+
|
465 |
+
Key responsibilities -
|
466 |
+
|
467 |
+
|
468 |
+
This role will focus on implementing data integration of new lending platform
|
469 |
+
into Google Cloud Data Platform (Data factory), existing analytical domains and
|
470 |
+
building new data marts, while ensuring new data is integrated seamlessly with
|
471 |
+
historical data. Will lead a dedicated team of data engineers & analysts to understand
|
472 |
+
and assess new data model and attributes, in upstream systems, and build an approach
|
473 |
+
to integrate this data into factory.Will lead the data integration architecture
|
474 |
+
(in collaboration with core mod platform & data factory architects) and designs,
|
475 |
+
and solution approach for Data FactoryWill understand the scope of reporting for
|
476 |
+
MMP (Minimal Marketable Product) launch & build the data marts required to enable
|
477 |
+
agreed use cases for regulatory, analytical & operational reporting, and data
|
478 |
+
required for Risk modeling. Will collaborate with Data Factory Analytical domain
|
479 |
+
teams, to build new pipelines & expansion of analytical domains. Will lead data
|
480 |
+
integration testing strategy & its execution within Data Factory (end-to-end,
|
481 |
+
from ingestion, to analytical domains, to marts) to support use cases.Will be
|
482 |
+
Data Factory SPOC for all Core Modernization program and help facilitate & prioritize
|
483 |
+
backlogs of data workstreams.Ensure the data solutions are aligned to overall
|
484 |
+
program goals, timing and are delivered with qualityCollaborate with program managers
|
485 |
+
to plan iterations, backlogs and dependencies across all workstream to progress
|
486 |
+
workstreams at required pace.Drive adoption of standardized architecture, design
|
487 |
+
and quality assurance approaches across all workstreams and ensure solutions adheres
|
488 |
+
to established standards.People leader for a team of 5+ data engineers and analysts.
|
489 |
+
Additionally manage supplier partner team who will execute the migration planLead
|
490 |
+
communication of status, issues & risks to key stakeholders
|
491 |
+
|
492 |
+
|
493 |
+
|
494 |
+
Qualifications
|
495 |
+
|
496 |
+
|
497 |
+
You''ll have…..
|
498 |
+
|
499 |
+
|
500 |
+
Bachelor’s degree in computer science or equivalent5+ years of experience delivering
|
501 |
+
complex Data warehousing projects and leading teams of 10+ engineers and suppliers
|
502 |
+
to build Big Data/Datawarehouse solutions.10+ years of experience in technical
|
503 |
+
delivery of Data Warehouse Cloud Solutions for large companies, and business adoption
|
504 |
+
of these platforms to build analytics , insights & modelsPrior experience with
|
505 |
+
cloud data architecture, data modelling principles, DevOps, security and controls
|
506 |
+
Google Cloud certified - Cloud Data Engineer preferred.Hands on experience of
|
507 |
+
the following:Orchestration of data pipelines (e.g. Airflow, DBT, Dataform, Astronomer).Batch
|
508 |
+
data pipelines (e.g. BQ SQL, Dataflow, DTS).Streaming data pipelines (e.g. Kafka,
|
509 |
+
Pub/Sub, gsutil)Data warehousing techniques (e.g. data modelling, ETL/ELT).
|
510 |
+
|
511 |
+
|
512 |
+
|
513 |
+
Even better, you may have….
|
514 |
+
|
515 |
+
|
516 |
+
Master’s degree in- Computer science, Computer engineering, Data science or related
|
517 |
+
fieldKnowledge of Ford credit business functional, core systems, data knowledge
|
518 |
+
Experience in technical program management & delivering complex migration projects.Building
|
519 |
+
high performance teamsManaging/or working with globally distributed teamsPrior
|
520 |
+
experience in leveraging offshore development service providers.Experience in
|
521 |
+
a Fintech or large manufacturing company.Very strong leadership, communication,
|
522 |
+
organizing and problem-solving skills.Ability to negotiate with and influence
|
523 |
+
stakeholders & drive forward strategic data transformation.Quick learner, self-starter,
|
524 |
+
energetic leaders with drive to deliver results. Empathy and care for customers
|
525 |
+
and teams, as a leader guide teams on advancement of skills, objective setting
|
526 |
+
and performance assessments
|
527 |
+
|
528 |
+
|
529 |
+
|
530 |
+
You may not check every box, or your experience may look a little different from
|
531 |
+
what we''ve outlined, but if you think you can bring value to Ford Motor Company,
|
532 |
+
we encourage you to apply!
|
533 |
+
|
534 |
+
|
535 |
+
As an established global company, we offer the benefit of choice. You can choose
|
536 |
+
what your Ford future will look like: will your story span the globe, or keep
|
537 |
+
you close to home? Will your career be a deep dive into what you love, or a series
|
538 |
+
of new teams and new skills? Will you be a leader, a changemaker, a technical
|
539 |
+
expert, a culture builder...or all of the above? No matter what you choose, we
|
540 |
+
offer a work life that works for you, including:
|
541 |
+
|
542 |
+
|
543 |
+
Immediate medical, dental, and prescription drug coverageFlexible family care,
|
544 |
+
parental leave, new parent ramp-up programs, subsidized back-up childcare and
|
545 |
+
moreVehicle discount program for employees and family members, and management
|
546 |
+
leasesTuition assistanceEstablished and active employee resource groupsPaid time
|
547 |
+
off for individual and team community serviceA generous schedule of paid holidays,
|
548 |
+
including the week between Christmas and New Year''s DayPaid time off and the
|
549 |
+
option to purchase additional vacation time
|
550 |
+
|
551 |
+
|
552 |
+
|
553 |
+
For a detailed look at our benefits, click here:
|
554 |
+
|
555 |
+
|
556 |
+
2024 New Hire Benefits Summary
|
557 |
+
|
558 |
+
|
559 |
+
Visa sponsorship is not available for this position.
|
560 |
+
|
561 |
+
|
562 |
+
Candidates for positions with Ford Motor Company must be legally authorized to
|
563 |
+
work in the United States. Verification of employment eligibility will be required
|
564 |
+
at the time of hire.
|
565 |
+
|
566 |
+
|
567 |
+
We are'
|
568 |
+
- "experience to solve some of the most challenging intelligence issues around data.\n\
|
569 |
+
\nJob Responsibilities & Duties\n\nDevise strategies for extracting meaning and\
|
570 |
+
\ value from large datasets. Make and communicate principled conclusions from\
|
571 |
+
\ data using elements of mathematics, statistics, computer science, and application\
|
572 |
+
\ specific knowledge. Through analytic modeling, statistical analysis, programming,\
|
573 |
+
\ and/or another appropriate scientific method, develop and implement qualitative\
|
574 |
+
\ and quantitative methods for characterizing, exploring, and assessing large\
|
575 |
+
\ datasets in various states of organization, cleanliness, and structure that\
|
576 |
+
\ account for the unique features and limitations inherent in data holdings. Translate\
|
577 |
+
\ practical needs and analytic questions related to large datasets into technical\
|
578 |
+
\ requirements and, conversely, assist others with drawing appropriate conclusions\
|
579 |
+
\ from the analysis of such data. Effectively communicate complex technical information\
|
580 |
+
\ to non-technical audiences.\n\nMinimum Qualifications\n\n10 years relevant experience\
|
581 |
+
\ with Bachelors in related field; or 8 years experience with Masters in related\
|
582 |
+
\ field; or 6 years experience with a Doctoral degree in a related field; or 12\
|
583 |
+
\ years of relevant experience and an Associates may be considered for individuals\
|
584 |
+
\ with in-depth experienceDegree in an Mathematics, Applied Mathematics, Statistics,\
|
585 |
+
\ Applied Statistics, Machine Learning, Data Science, Operations Research, or\
|
586 |
+
\ Computer Science, or related field of technical rigorAbility/willingness to\
|
587 |
+
\ work full-time onsite in secure government workspacesNote: A broader range of\
|
588 |
+
\ degrees will be considered if accompanied by a Certificate in Data Science from\
|
589 |
+
\ an accredited college/university.\n\nClearance Requirements\n\nThis position\
|
590 |
+
\ requires a TS/SCI with Poly\n\nLooking for other great opportunities? Check\
|
591 |
+
\ out Two Six Technologies Opportunities for all our Company’s current openings!\n\
|
592 |
+
\nReady to make the first move towards growing your career? If so, check out the\
|
593 |
+
\ Two Six Technologies Candidate Journey! This will give you step-by-step directions\
|
594 |
+
\ on applying, what to expect during the application process, information about\
|
595 |
+
\ our rich benefits and perks along with our most frequently asked questions.\
|
596 |
+
\ If you are undecided and would like to learn more about us and how we are contributing\
|
597 |
+
\ to essential missions, check out our Two Six Technologies News page! We share\
|
598 |
+
\ information about the tech world around us and how we are making an impact!\
|
599 |
+
\ Still have questions, no worries! You can reach us at Contact Two Six Technologies.\
|
600 |
+
\ We are happy to connect and cover the information needed to assist you in reaching\
|
601 |
+
\ your next career milestone.\n\nTwo Six Technologies is \n\nIf you are an individual\
|
602 |
+
\ with a disability and would like to request reasonable workplace accommodation\
|
603 |
+
\ for any part of our employment process, please send an email to [email protected].\
|
604 |
+
\ Information provided will be kept confidential and used only to the extent required\
|
605 |
+
\ to provide needed reasonable accommodations.\n\nAdditionally, please be advised\
|
606 |
+
\ that this business uses E-Verify in its hiring practices.\n\n\n\nBy submitting\
|
607 |
+
\ the following application, I hereby certify that to the best of my knowledge,\
|
608 |
+
\ the information provided is true and accurate."
|
609 |
+
pipeline_tag: sentence-similarity
|
610 |
+
library_name: sentence-transformers
|
611 |
+
metrics:
|
612 |
+
- cosine_accuracy
|
613 |
+
model-index:
|
614 |
+
- name: SentenceTransformer based on sentence-transformers/all-distilroberta-v1
|
615 |
+
results:
|
616 |
+
- task:
|
617 |
+
type: triplet
|
618 |
+
name: Triplet
|
619 |
+
dataset:
|
620 |
+
name: ai job validation
|
621 |
+
type: ai-job-validation
|
622 |
+
metrics:
|
623 |
+
- type: cosine_accuracy
|
624 |
+
value: 0.9900990128517151
|
625 |
+
name: Cosine Accuracy
|
626 |
+
- task:
|
627 |
+
type: triplet
|
628 |
+
name: Triplet
|
629 |
+
dataset:
|
630 |
+
name: ai job test
|
631 |
+
type: ai-job-test
|
632 |
+
metrics:
|
633 |
+
- type: cosine_accuracy
|
634 |
+
value: 1.0
|
635 |
+
name: Cosine Accuracy
|
636 |
+
---
|
637 |
+
|
638 |
+
# SentenceTransformer based on sentence-transformers/all-distilroberta-v1
|
639 |
+
|
640 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-distilroberta-v1](https://huggingface.co/sentence-transformers/all-distilroberta-v1). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
641 |
+
|
642 |
+
## Model Details
|
643 |
+
|
644 |
+
### Model Description
|
645 |
+
- **Model Type:** Sentence Transformer
|
646 |
+
- **Base model:** [sentence-transformers/all-distilroberta-v1](https://huggingface.co/sentence-transformers/all-distilroberta-v1) <!-- at revision 8d88b92a34345fd6a139aa47768c9881720006ce -->
|
647 |
+
- **Maximum Sequence Length:** 512 tokens
|
648 |
+
- **Output Dimensionality:** 768 dimensions
|
649 |
+
- **Similarity Function:** Cosine Similarity
|
650 |
+
<!-- - **Training Dataset:** Unknown -->
|
651 |
+
<!-- - **Language:** Unknown -->
|
652 |
+
<!-- - **License:** Unknown -->
|
653 |
+
|
654 |
+
### Model Sources
|
655 |
+
|
656 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
657 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
658 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
659 |
+
|
660 |
+
### Full Model Architecture
|
661 |
+
|
662 |
+
```
|
663 |
+
SentenceTransformer(
|
664 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: RobertaModel
|
665 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
666 |
+
(2): Normalize()
|
667 |
+
)
|
668 |
+
```
|
669 |
+
|
670 |
+
## Usage
|
671 |
+
|
672 |
+
### Direct Usage (Sentence Transformers)
|
673 |
+
|
674 |
+
First install the Sentence Transformers library:
|
675 |
+
|
676 |
+
```bash
|
677 |
+
pip install -U sentence-transformers
|
678 |
+
```
|
679 |
+
|
680 |
+
Then you can load this model and run inference.
|
681 |
+
```python
|
682 |
+
from sentence_transformers import SentenceTransformer
|
683 |
+
|
684 |
+
# Download from the 🤗 Hub
|
685 |
+
model = SentenceTransformer("krshahvivek/distilroberta-ai-job-embeddings")
|
686 |
+
# Run inference
|
687 |
+
sentences = [
|
688 |
+
'Senior Data Scientist, Statistical Analysis, Data Interpretation, TS/SCI Clearance',
|
689 |
+
'experience to solve some of the most challenging intelligence issues around data.\n\nJob Responsibilities & Duties\n\nDevise strategies for extracting meaning and value from large datasets. Make and communicate principled conclusions from data using elements of mathematics, statistics, computer science, and application specific knowledge. Through analytic modeling, statistical analysis, programming, and/or another appropriate scientific method, develop and implement qualitative and quantitative methods for characterizing, exploring, and assessing large datasets in various states of organization, cleanliness, and structure that account for the unique features and limitations inherent in data holdings. Translate practical needs and analytic questions related to large datasets into technical requirements and, conversely, assist others with drawing appropriate conclusions from the analysis of such data. Effectively communicate complex technical information to non-technical audiences.\n\nMinimum Qualifications\n\n10 years relevant experience with Bachelors in related field; or 8 years experience with Masters in related field; or 6 years experience with a Doctoral degree in a related field; or 12 years of relevant experience and an Associates may be considered for individuals with in-depth experienceDegree in an Mathematics, Applied Mathematics, Statistics, Applied Statistics, Machine Learning, Data Science, Operations Research, or Computer Science, or related field of technical rigorAbility/willingness to work full-time onsite in secure government workspacesNote: A broader range of degrees will be considered if accompanied by a Certificate in Data Science from an accredited college/university.\n\nClearance Requirements\n\nThis position requires a TS/SCI with Poly\n\nLooking for other great opportunities? Check out Two Six Technologies Opportunities for all our Company’s current openings!\n\nReady to make the first move towards growing your career? If so, check out the Two Six Technologies Candidate Journey! This will give you step-by-step directions on applying, what to expect during the application process, information about our rich benefits and perks along with our most frequently asked questions. If you are undecided and would like to learn more about us and how we are contributing to essential missions, check out our Two Six Technologies News page! We share information about the tech world around us and how we are making an impact! Still have questions, no worries! You can reach us at Contact Two Six Technologies. We are happy to connect and cover the information needed to assist you in reaching your next career milestone.\n\nTwo Six Technologies is \n\nIf you are an individual with a disability and would like to request reasonable workplace accommodation for any part of our employment process, please send an email to [email protected]. Information provided will be kept confidential and used only to the extent required to provide needed reasonable accommodations.\n\nAdditionally, please be advised that this business uses E-Verify in its hiring practices.\n\n\n\nBy submitting the following application, I hereby certify that to the best of my knowledge, the information provided is true and accurate.',
|
690 |
+
'Skills :8+ years of relevant experienceExperience with big data technology(s) or ecosystem in Hadoop, HDFS (also an understanding of HDFS Architecture), Hive, Map Reduce, Base - this is considering all of AMP datasets are in HDFS/S3Advanced SQL and SQL performance tuningStrong experience in Spark and Scala',
|
691 |
+
]
|
692 |
+
embeddings = model.encode(sentences)
|
693 |
+
print(embeddings.shape)
|
694 |
+
# [3, 768]
|
695 |
+
|
696 |
+
# Get the similarity scores for the embeddings
|
697 |
+
similarities = model.similarity(embeddings, embeddings)
|
698 |
+
print(similarities.shape)
|
699 |
+
# [3, 3]
|
700 |
+
```
|
701 |
+
|
702 |
+
<!--
|
703 |
+
### Direct Usage (Transformers)
|
704 |
+
|
705 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
706 |
+
|
707 |
+
</details>
|
708 |
+
-->
|
709 |
+
|
710 |
+
<!--
|
711 |
+
### Downstream Usage (Sentence Transformers)
|
712 |
+
|
713 |
+
You can finetune this model on your own dataset.
|
714 |
+
|
715 |
+
<details><summary>Click to expand</summary>
|
716 |
+
|
717 |
+
</details>
|
718 |
+
-->
|
719 |
+
|
720 |
+
<!--
|
721 |
+
### Out-of-Scope Use
|
722 |
+
|
723 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
724 |
+
-->
|
725 |
+
|
726 |
+
## Evaluation
|
727 |
+
|
728 |
+
### Metrics
|
729 |
+
|
730 |
+
#### Triplet
|
731 |
+
|
732 |
+
* Datasets: `ai-job-validation` and `ai-job-test`
|
733 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
734 |
+
|
735 |
+
| Metric | ai-job-validation | ai-job-test |
|
736 |
+
|:--------------------|:------------------|:------------|
|
737 |
+
| **cosine_accuracy** | **0.9901** | **1.0** |
|
738 |
+
|
739 |
+
<!--
|
740 |
+
## Bias, Risks and Limitations
|
741 |
+
|
742 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
743 |
+
-->
|
744 |
+
|
745 |
+
<!--
|
746 |
+
### Recommendations
|
747 |
+
|
748 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
749 |
+
-->
|
750 |
+
|
751 |
+
## Training Details
|
752 |
+
|
753 |
+
### Training Dataset
|
754 |
+
|
755 |
+
#### Unnamed Dataset
|
756 |
+
|
757 |
+
* Size: 809 training samples
|
758 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
759 |
+
* Approximate statistics based on the first 809 samples:
|
760 |
+
| | sentence_0 | sentence_1 |
|
761 |
+
|:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
762 |
+
| type | string | string |
|
763 |
+
| details | <ul><li>min: 8 tokens</li><li>mean: 15.02 tokens</li><li>max: 40 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 348.14 tokens</li><li>max: 512 tokens</li></ul> |
|
764 |
+
* Samples:
|
765 |
+
| sentence_0 | sentence_1 |
|
766 |
+
|:------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
767 |
+
| <code>GCP Data Engineer, BigQuery, Airflow DAG, Hadoop ecosystem</code> | <code>requirements for our direct client, please go through the below Job Description. If you are interested please send me your updated word format resume to [email protected] and reach me @ 520-231-4672.<br> Title: GCP Data EngineerLocation: Hartford, CTDuration: Full Time<br>6-8 Years of experience in data extraction and creating data pipeline workflows on Bigdata (Hive, HQL/PySpark) with knowledge of Data Engineering concepts.Experience in analyzing large data sets from multiple data sources, perform validation of data.Knowledge of Hadoop eco-system components like HDFS, Spark, Hive, Sqoop.Experience writing codes in Python.Knowledge of SQL/HQL to write optimized queries.Hands on with GCP Cloud Services such as Big Query, Airflow DAG, Dataflow, Beam etc.</code> |
|
768 |
+
| <code>Data analysis for legal documents, meticulous data entry, active Top-Secret security clearance</code> | <code>Requirements NOTE: Applicants with an Active TS Clearance preferred Requirements * High School diploma or GED, Undergraduate degree preferred Ability to grasp and understand the organization and functions of the customer Meticulous data entry skills Excellent communication skills; oral and written Competence to review, interpret, and evaluate complex legal and non-legal documents Attention to detail and the ability to read and follow directions is extremely important Strong organizational and prioritization skills Experience with the Microsoft Office suite of applications (Excel, PowerPoint, Word) and other common software applications, to include databases, intermediate skills preferred Proven commitment and competence to provide excellent customer service; positive and flexible Ability to work in a team environment and maintain a professional dispositionThis position requires U.S. Citizenship and a 7 (or 10) year minimum background investigation ** NOTE: The 20% pay differential is d...</code> |
|
769 |
+
| <code>Trust & Safety, Generative AI, Recommender Systems</code> | <code>experiences achieve more in their careers. Our vision is to create economic opportunity for every member of the global workforce. Every day our members use our products to make connections, discover opportunities, build skills and gain insights. We believe amazing things happen when we work together in an environment where everyone feels a true sense of belonging, and that what matters most in a candidate is having the skills needed to succeed. It inspires us to invest in our talent and support career growth. Join us to challenge yourself with work that matters.<br><br>Location: <br><br>At LinkedIn, we trust each other to do our best work where it works best for us and our teams. This role offers a hybrid work option, meaning you can work from home and commute to a LinkedIn office, depending on what’s best for you and when it is important for your team to be together. <br><br>This role is based in Sunnyvale, CA. <br><br><br>Team Information:<br><br><br>The mission of the Anti-Abuse AI team is to build trust in every inte...</code> |
|
770 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
771 |
+
```json
|
772 |
+
{
|
773 |
+
"scale": 20.0,
|
774 |
+
"similarity_fct": "cos_sim"
|
775 |
+
}
|
776 |
+
```
|
777 |
+
|
778 |
+
### Training Hyperparameters
|
779 |
+
#### Non-Default Hyperparameters
|
780 |
+
|
781 |
+
- `per_device_train_batch_size`: 2
|
782 |
+
- `per_device_eval_batch_size`: 2
|
783 |
+
- `num_train_epochs`: 2
|
784 |
+
- `multi_dataset_batch_sampler`: round_robin
|
785 |
+
|
786 |
+
#### All Hyperparameters
|
787 |
+
<details><summary>Click to expand</summary>
|
788 |
+
|
789 |
+
- `overwrite_output_dir`: False
|
790 |
+
- `do_predict`: False
|
791 |
+
- `eval_strategy`: no
|
792 |
+
- `prediction_loss_only`: True
|
793 |
+
- `per_device_train_batch_size`: 2
|
794 |
+
- `per_device_eval_batch_size`: 2
|
795 |
+
- `per_gpu_train_batch_size`: None
|
796 |
+
- `per_gpu_eval_batch_size`: None
|
797 |
+
- `gradient_accumulation_steps`: 1
|
798 |
+
- `eval_accumulation_steps`: None
|
799 |
+
- `torch_empty_cache_steps`: None
|
800 |
+
- `learning_rate`: 5e-05
|
801 |
+
- `weight_decay`: 0.0
|
802 |
+
- `adam_beta1`: 0.9
|
803 |
+
- `adam_beta2`: 0.999
|
804 |
+
- `adam_epsilon`: 1e-08
|
805 |
+
- `max_grad_norm`: 1
|
806 |
+
- `num_train_epochs`: 2
|
807 |
+
- `max_steps`: -1
|
808 |
+
- `lr_scheduler_type`: linear
|
809 |
+
- `lr_scheduler_kwargs`: {}
|
810 |
+
- `warmup_ratio`: 0.0
|
811 |
+
- `warmup_steps`: 0
|
812 |
+
- `log_level`: passive
|
813 |
+
- `log_level_replica`: warning
|
814 |
+
- `log_on_each_node`: True
|
815 |
+
- `logging_nan_inf_filter`: True
|
816 |
+
- `save_safetensors`: True
|
817 |
+
- `save_on_each_node`: False
|
818 |
+
- `save_only_model`: False
|
819 |
+
- `restore_callback_states_from_checkpoint`: False
|
820 |
+
- `no_cuda`: False
|
821 |
+
- `use_cpu`: False
|
822 |
+
- `use_mps_device`: False
|
823 |
+
- `seed`: 42
|
824 |
+
- `data_seed`: None
|
825 |
+
- `jit_mode_eval`: False
|
826 |
+
- `use_ipex`: False
|
827 |
+
- `bf16`: False
|
828 |
+
- `fp16`: False
|
829 |
+
- `fp16_opt_level`: O1
|
830 |
+
- `half_precision_backend`: auto
|
831 |
+
- `bf16_full_eval`: False
|
832 |
+
- `fp16_full_eval`: False
|
833 |
+
- `tf32`: None
|
834 |
+
- `local_rank`: 0
|
835 |
+
- `ddp_backend`: None
|
836 |
+
- `tpu_num_cores`: None
|
837 |
+
- `tpu_metrics_debug`: False
|
838 |
+
- `debug`: []
|
839 |
+
- `dataloader_drop_last`: False
|
840 |
+
- `dataloader_num_workers`: 0
|
841 |
+
- `dataloader_prefetch_factor`: None
|
842 |
+
- `past_index`: -1
|
843 |
+
- `disable_tqdm`: False
|
844 |
+
- `remove_unused_columns`: True
|
845 |
+
- `label_names`: None
|
846 |
+
- `load_best_model_at_end`: False
|
847 |
+
- `ignore_data_skip`: False
|
848 |
+
- `fsdp`: []
|
849 |
+
- `fsdp_min_num_params`: 0
|
850 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
851 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
852 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
853 |
+
- `deepspeed`: None
|
854 |
+
- `label_smoothing_factor`: 0.0
|
855 |
+
- `optim`: adamw_torch
|
856 |
+
- `optim_args`: None
|
857 |
+
- `adafactor`: False
|
858 |
+
- `group_by_length`: False
|
859 |
+
- `length_column_name`: length
|
860 |
+
- `ddp_find_unused_parameters`: None
|
861 |
+
- `ddp_bucket_cap_mb`: None
|
862 |
+
- `ddp_broadcast_buffers`: False
|
863 |
+
- `dataloader_pin_memory`: True
|
864 |
+
- `dataloader_persistent_workers`: False
|
865 |
+
- `skip_memory_metrics`: True
|
866 |
+
- `use_legacy_prediction_loop`: False
|
867 |
+
- `push_to_hub`: False
|
868 |
+
- `resume_from_checkpoint`: None
|
869 |
+
- `hub_model_id`: None
|
870 |
+
- `hub_strategy`: every_save
|
871 |
+
- `hub_private_repo`: None
|
872 |
+
- `hub_always_push`: False
|
873 |
+
- `gradient_checkpointing`: False
|
874 |
+
- `gradient_checkpointing_kwargs`: None
|
875 |
+
- `include_inputs_for_metrics`: False
|
876 |
+
- `include_for_metrics`: []
|
877 |
+
- `eval_do_concat_batches`: True
|
878 |
+
- `fp16_backend`: auto
|
879 |
+
- `push_to_hub_model_id`: None
|
880 |
+
- `push_to_hub_organization`: None
|
881 |
+
- `mp_parameters`:
|
882 |
+
- `auto_find_batch_size`: False
|
883 |
+
- `full_determinism`: False
|
884 |
+
- `torchdynamo`: None
|
885 |
+
- `ray_scope`: last
|
886 |
+
- `ddp_timeout`: 1800
|
887 |
+
- `torch_compile`: False
|
888 |
+
- `torch_compile_backend`: None
|
889 |
+
- `torch_compile_mode`: None
|
890 |
+
- `dispatch_batches`: None
|
891 |
+
- `split_batches`: None
|
892 |
+
- `include_tokens_per_second`: False
|
893 |
+
- `include_num_input_tokens_seen`: False
|
894 |
+
- `neftune_noise_alpha`: None
|
895 |
+
- `optim_target_modules`: None
|
896 |
+
- `batch_eval_metrics`: False
|
897 |
+
- `eval_on_start`: False
|
898 |
+
- `use_liger_kernel`: False
|
899 |
+
- `eval_use_gather_object`: False
|
900 |
+
- `average_tokens_across_devices`: False
|
901 |
+
- `prompts`: None
|
902 |
+
- `batch_sampler`: batch_sampler
|
903 |
+
- `multi_dataset_batch_sampler`: round_robin
|
904 |
+
|
905 |
+
</details>
|
906 |
+
|
907 |
+
### Training Logs
|
908 |
+
| Epoch | Step | Training Loss | ai-job-validation_cosine_accuracy | ai-job-test_cosine_accuracy |
|
909 |
+
|:------:|:----:|:-------------:|:---------------------------------:|:---------------------------:|
|
910 |
+
| -1 | -1 | - | 0.8812 | - |
|
911 |
+
| 1.0 | 405 | - | 0.9901 | - |
|
912 |
+
| 1.2346 | 500 | 0.07 | - | - |
|
913 |
+
| 2.0 | 810 | - | 0.9901 | - |
|
914 |
+
| -1 | -1 | - | 0.9901 | 1.0 |
|
915 |
+
|
916 |
+
|
917 |
+
### Framework Versions
|
918 |
+
- Python: 3.10.12
|
919 |
+
- Sentence Transformers: 3.4.1
|
920 |
+
- Transformers: 4.48.3
|
921 |
+
- PyTorch: 2.6.0+cu124
|
922 |
+
- Accelerate: 1.3.0
|
923 |
+
- Datasets: 3.2.0
|
924 |
+
- Tokenizers: 0.21.0
|
925 |
+
|
926 |
+
## Citation
|
927 |
+
|
928 |
+
### BibTeX
|
929 |
+
|
930 |
+
#### Sentence Transformers
|
931 |
+
```bibtex
|
932 |
+
@inproceedings{reimers-2019-sentence-bert,
|
933 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
934 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
935 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
936 |
+
month = "11",
|
937 |
+
year = "2019",
|
938 |
+
publisher = "Association for Computational Linguistics",
|
939 |
+
url = "https://arxiv.org/abs/1908.10084",
|
940 |
+
}
|
941 |
+
```
|
942 |
+
|
943 |
+
#### MultipleNegativesRankingLoss
|
944 |
+
```bibtex
|
945 |
+
@misc{henderson2017efficient,
|
946 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
947 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
948 |
+
year={2017},
|
949 |
+
eprint={1705.00652},
|
950 |
+
archivePrefix={arXiv},
|
951 |
+
primaryClass={cs.CL}
|
952 |
+
}
|
953 |
+
```
|
954 |
+
|
955 |
+
<!--
|
956 |
+
## Glossary
|
957 |
+
|
958 |
+
*Clearly define terms in order to be accessible across audiences.*
|
959 |
+
-->
|
960 |
+
|
961 |
+
<!--
|
962 |
+
## Model Card Authors
|
963 |
+
|
964 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
965 |
+
-->
|
966 |
+
|
967 |
+
<!--
|
968 |
+
## Model Card Contact
|
969 |
+
|
970 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
971 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/all-distilroberta-v1",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"gradient_checkpointing": false,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 3072,
|
16 |
+
"layer_norm_eps": 1e-05,
|
17 |
+
"max_position_embeddings": 514,
|
18 |
+
"model_type": "roberta",
|
19 |
+
"num_attention_heads": 12,
|
20 |
+
"num_hidden_layers": 6,
|
21 |
+
"pad_token_id": 1,
|
22 |
+
"position_embedding_type": "absolute",
|
23 |
+
"torch_dtype": "float32",
|
24 |
+
"transformers_version": "4.48.3",
|
25 |
+
"type_vocab_size": 1,
|
26 |
+
"use_cache": true,
|
27 |
+
"vocab_size": 50265
|
28 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.4.1",
|
4 |
+
"transformers": "4.48.3",
|
5 |
+
"pytorch": "2.6.0+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75f66fe0991134c72cf8db2f5c42f2bdf317a76d3e3f6c2b00e11b3c9aeac88b
|
3 |
+
size 328485128
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<s>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"1": {
|
13 |
+
"content": "<pad>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"2": {
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"3": {
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": false,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"50264": {
|
37 |
+
"content": "<mask>",
|
38 |
+
"lstrip": true,
|
39 |
+
"normalized": false,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
}
|
44 |
+
},
|
45 |
+
"bos_token": "<s>",
|
46 |
+
"clean_up_tokenization_spaces": false,
|
47 |
+
"cls_token": "<s>",
|
48 |
+
"eos_token": "</s>",
|
49 |
+
"errors": "replace",
|
50 |
+
"extra_special_tokens": {},
|
51 |
+
"mask_token": "<mask>",
|
52 |
+
"max_length": 128,
|
53 |
+
"model_max_length": 512,
|
54 |
+
"pad_to_multiple_of": null,
|
55 |
+
"pad_token": "<pad>",
|
56 |
+
"pad_token_type_id": 0,
|
57 |
+
"padding_side": "right",
|
58 |
+
"sep_token": "</s>",
|
59 |
+
"stride": 0,
|
60 |
+
"tokenizer_class": "RobertaTokenizer",
|
61 |
+
"trim_offsets": true,
|
62 |
+
"truncation_side": "right",
|
63 |
+
"truncation_strategy": "longest_first",
|
64 |
+
"unk_token": "<unk>"
|
65 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|