johnlockejrr commited on
Commit
eaa3d2a
·
verified ·
1 Parent(s): 2ecf187

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. README.md +67 -7
  3. app.py +225 -0
  4. data/samaritanus.db +3 -0
  5. requirements.txt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/samaritanus.db filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,74 @@
1
  ---
2
- title: Samaritanus
3
- emoji: 🐨
4
  colorFrom: blue
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 5.33.1
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
- short_description: Samaritan Torah Search
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Samaritan Torah Search
3
+ emoji: 📖
4
  colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.19.2
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
+ # Samaritan Torah Search
13
+
14
+ A Gradio application for searching through the Samaritan Torah text with both exact and fuzzy matching capabilities.
15
+
16
+ ## Features
17
+
18
+ - **Search Types**:
19
+ - Exact Match: Find precise matches of your search terms
20
+ - Fuzzy Match: Find similar matches with more lenient matching rules
21
+
22
+ - **Search Capabilities**:
23
+ - Full text search across all verses
24
+ - Highlighted search terms in results
25
+ - RTL (Right-to-Left) text support for Hebrew
26
+
27
+ - **User Interface**:
28
+ - Clean, modern design
29
+ - Responsive layout
30
+ - Custom Hebrew font (NarkisClassic)
31
+ - Search button and Enter key support
32
+
33
+ ## Usage
34
+
35
+ 1. Enter your search term in the search box
36
+ 2. Choose between "Exact Match" or "Fuzzy Match"
37
+ 3. Click the Search button or press Enter
38
+ 4. View the results with highlighted matches
39
+
40
+ ## Technical Details
41
+
42
+ - Built with Gradio
43
+ - Uses SQLite with FTS5 for full-text search
44
+ - Custom CSS for RTL support and styling
45
+ - Responsive design for all screen sizes
46
+
47
+ ## Development
48
+
49
+ ### Local Setup
50
+
51
+ 1. Install dependencies:
52
+ ```bash
53
+ pip install -r requirements.txt
54
+ ```
55
+
56
+ 2. Run the app:
57
+ ```bash
58
+ python app.py
59
+ ```
60
+
61
+ ### Hugging Face Spaces
62
+
63
+ This app is deployed on Hugging Face Spaces. The deployment is automatic when changes are pushed to the repository.
64
+
65
+ ## Data
66
+
67
+ The app uses a SQLite database with FTS5 enabled for efficient full-text search. The database contains the complete text of the Samaritan Torah with the following structure:
68
+
69
+ - `verses` table: Contains the main verse data
70
+ - `verses_fts` virtual table: FTS5 table for full-text search
71
+
72
+ ## License
73
+
74
+ MIT License
app.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sqlite3
3
+ import pandas as pd
4
+ from pathlib import Path
5
+ import re
6
+
7
+ def get_db_connection():
8
+ db_path = Path(__file__).parent / 'data' / 'samaritanus.db'
9
+ return sqlite3.connect(db_path)
10
+
11
+ def highlight_text(text, search_term):
12
+ if not search_term:
13
+ return text
14
+ pattern = re.escape(search_term)
15
+ return re.sub(f'({pattern})', r'<mark>\1</mark>', text, flags=re.IGNORECASE)
16
+
17
+ def search_verses(search_term, exact_match, page=1, per_page=10):
18
+ conn = get_db_connection()
19
+
20
+ if exact_match:
21
+ # Exact match
22
+ query = """
23
+ SELECT v.*,
24
+ COUNT(*) OVER() as total_count
25
+ FROM verses v
26
+ JOIN verses_fts fts ON v.id = fts.rowid
27
+ WHERE verses_fts MATCH ?
28
+ ORDER BY v.book, v.chapter, v.verse
29
+ LIMIT ? OFFSET ?
30
+ """
31
+ params = [search_term, per_page, (page - 1) * per_page]
32
+ else:
33
+ # Fuzzy match using SQLite's built-in fuzzy search
34
+ words = search_term.split()
35
+ # Permissive fuzzy match - use more lenient matching
36
+ fuzzy_terms = []
37
+ for word in words:
38
+ # Add the word itself
39
+ fuzzy_terms.append(word)
40
+ # Add prefix match
41
+ fuzzy_terms.append(f"{word}*")
42
+ match_expr = " OR ".join(fuzzy_terms)
43
+
44
+ query = """
45
+ SELECT v.*,
46
+ COUNT(*) OVER() as total_count
47
+ FROM verses v
48
+ JOIN verses_fts fts ON v.id = fts.rowid
49
+ WHERE verses_fts MATCH ?
50
+ ORDER BY v.book, v.chapter, v.verse
51
+ LIMIT ? OFFSET ?
52
+ """
53
+ params = [match_expr, per_page, (page - 1) * per_page]
54
+
55
+ df = pd.read_sql_query(query, conn, params=params)
56
+ conn.close()
57
+ return df
58
+
59
+ def format_results(results):
60
+ if results.empty:
61
+ return "No results found"
62
+
63
+ formatted_results = []
64
+ for _, row in results.iterrows():
65
+ verse_ref = f"{row['book_name']} {row['chapter']}:{row['verse']}"
66
+ verse_text = row['text']
67
+ formatted_results.append(f"<div style='text-align: right; direction: rtl;'><b>{verse_ref}</b><br>{verse_text}</div><hr>")
68
+
69
+ return "\n".join(formatted_results)
70
+
71
+ def search(search_term, exact_match, page=1):
72
+ if not search_term:
73
+ return "Please enter a search term", 1, 1
74
+
75
+ results = search_verses(search_term, exact_match, page)
76
+
77
+ if results.empty:
78
+ return "No results found", 1, 1
79
+
80
+ total_results = results['total_count'].iloc[0]
81
+ total_pages = (total_results + 9) // 10 # Round up division
82
+
83
+ formatted_results = format_results(results)
84
+ result_count = f"Found {total_results} {'result' if total_results == 1 else 'results'}"
85
+
86
+ return f"{result_count}\n\n{formatted_results}", page, total_pages
87
+
88
+ # Custom CSS
89
+ css = """
90
+ @font-face {
91
+ font-family: 'NarkisClassic';
92
+ src: url('https://raw.githubusercontent.com/johnlockejrr/samaritanus_search/main/public/fonts/NarkisClassic-Regular.woff') format('woff');
93
+ font-weight: normal;
94
+ font-style: normal;
95
+ }
96
+ @font-face {
97
+ font-family: 'NarkisClassic';
98
+ src: url('https://raw.githubusercontent.com/johnlockejrr/samaritanus_search/main/public/fonts/NarkisClassic-Bold.woff') format('woff');
99
+ font-weight: bold;
100
+ font-style: normal;
101
+ }
102
+ .gradio-container {
103
+ font-family: 'NarkisClassic', serif;
104
+ }
105
+ mark {
106
+ background-color: yellow;
107
+ font-weight: bold;
108
+ }
109
+ """
110
+
111
+ # Create the Gradio interface
112
+ with gr.Blocks(css=css) as demo:
113
+ gr.Markdown("# Samaritan Torah Search")
114
+
115
+ # Store current page in session state
116
+ current_page = gr.State(1)
117
+ total_pages = gr.State(1)
118
+
119
+ with gr.Row():
120
+ search_input = gr.Textbox(
121
+ label="Search",
122
+ placeholder="Enter search term...",
123
+ scale=4
124
+ )
125
+ search_type = gr.Radio(
126
+ choices=["Exact Match", "Fuzzy Match"],
127
+ value="Exact Match",
128
+ label="Search Type",
129
+ scale=1
130
+ )
131
+
132
+ search_button = gr.Button("Search")
133
+
134
+ with gr.Row():
135
+ output = gr.Markdown()
136
+
137
+ with gr.Row(visible=False) as pagination_row:
138
+ with gr.Column(scale=1):
139
+ prev_button = gr.Button("←", size="sm")
140
+ with gr.Column(scale=3):
141
+ page_info = gr.Markdown()
142
+ with gr.Column(scale=1):
143
+ next_button = gr.Button("→", size="sm")
144
+
145
+ def search_wrapper(search_term, search_type, page):
146
+ exact_match = search_type == "Exact Match"
147
+ return search(search_term, exact_match, page)
148
+
149
+ def reset_page():
150
+ return 1, 1
151
+
152
+ def update_pagination(page, total):
153
+ if total > 1:
154
+ return gr.Row.update(visible=True), f"Page {page} of {total}"
155
+ return gr.Row.update(visible=False), ""
156
+
157
+ def prev_page(page, total):
158
+ if page > 1:
159
+ return page - 1
160
+ return page
161
+
162
+ def next_page(page, total):
163
+ if page < total:
164
+ return page + 1
165
+ return page
166
+
167
+ # Search button click
168
+ search_button.click(
169
+ fn=reset_page,
170
+ outputs=[current_page, total_pages]
171
+ ).then(
172
+ fn=search_wrapper,
173
+ inputs=[search_input, search_type, current_page],
174
+ outputs=[output, current_page, total_pages]
175
+ ).then(
176
+ fn=update_pagination,
177
+ inputs=[current_page, total_pages],
178
+ outputs=[pagination_row, page_info]
179
+ )
180
+
181
+ # Search input submit
182
+ search_input.submit(
183
+ fn=reset_page,
184
+ outputs=[current_page, total_pages]
185
+ ).then(
186
+ fn=search_wrapper,
187
+ inputs=[search_input, search_type, current_page],
188
+ outputs=[output, current_page, total_pages]
189
+ ).then(
190
+ fn=update_pagination,
191
+ inputs=[current_page, total_pages],
192
+ outputs=[pagination_row, page_info]
193
+ )
194
+
195
+ # Pagination buttons
196
+ prev_button.click(
197
+ fn=prev_page,
198
+ inputs=[current_page, total_pages],
199
+ outputs=current_page
200
+ ).then(
201
+ fn=search_wrapper,
202
+ inputs=[search_input, search_type, current_page],
203
+ outputs=[output, current_page, total_pages]
204
+ ).then(
205
+ fn=update_pagination,
206
+ inputs=[current_page, total_pages],
207
+ outputs=[pagination_row, page_info]
208
+ )
209
+
210
+ next_button.click(
211
+ fn=next_page,
212
+ inputs=[current_page, total_pages],
213
+ outputs=current_page
214
+ ).then(
215
+ fn=search_wrapper,
216
+ inputs=[search_input, search_type, current_page],
217
+ outputs=[output, current_page, total_pages]
218
+ ).then(
219
+ fn=update_pagination,
220
+ inputs=[current_page, total_pages],
221
+ outputs=[pagination_row, page_info]
222
+ )
223
+
224
+ if __name__ == "__main__":
225
+ demo.launch()
data/samaritanus.db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4083e6bfb2a3bb13cd06170f3aa46d4b5bb75f494aeaa7f1f831f3575d1380
3
+ size 1327104
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=4.19.2
2
+ pandas>=2.2.0
3
+ sqlite3