Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- .gitattributes +1 -0
- README.md +67 -7
- app.py +225 -0
- data/samaritanus.db +3 -0
- requirements.txt +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/samaritanus.db filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,14 +1,74 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: blue
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license: mit
|
11 |
-
short_description: Samaritan Torah Search
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Samaritan Torah Search
|
3 |
+
emoji: 📖
|
4 |
colorFrom: blue
|
5 |
+
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.19.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
10 |
---
|
11 |
|
12 |
+
# Samaritan Torah Search
|
13 |
+
|
14 |
+
A Gradio application for searching through the Samaritan Torah text with both exact and fuzzy matching capabilities.
|
15 |
+
|
16 |
+
## Features
|
17 |
+
|
18 |
+
- **Search Types**:
|
19 |
+
- Exact Match: Find precise matches of your search terms
|
20 |
+
- Fuzzy Match: Find similar matches with more lenient matching rules
|
21 |
+
|
22 |
+
- **Search Capabilities**:
|
23 |
+
- Full text search across all verses
|
24 |
+
- Highlighted search terms in results
|
25 |
+
- RTL (Right-to-Left) text support for Hebrew
|
26 |
+
|
27 |
+
- **User Interface**:
|
28 |
+
- Clean, modern design
|
29 |
+
- Responsive layout
|
30 |
+
- Custom Hebrew font (NarkisClassic)
|
31 |
+
- Search button and Enter key support
|
32 |
+
|
33 |
+
## Usage
|
34 |
+
|
35 |
+
1. Enter your search term in the search box
|
36 |
+
2. Choose between "Exact Match" or "Fuzzy Match"
|
37 |
+
3. Click the Search button or press Enter
|
38 |
+
4. View the results with highlighted matches
|
39 |
+
|
40 |
+
## Technical Details
|
41 |
+
|
42 |
+
- Built with Gradio
|
43 |
+
- Uses SQLite with FTS5 for full-text search
|
44 |
+
- Custom CSS for RTL support and styling
|
45 |
+
- Responsive design for all screen sizes
|
46 |
+
|
47 |
+
## Development
|
48 |
+
|
49 |
+
### Local Setup
|
50 |
+
|
51 |
+
1. Install dependencies:
|
52 |
+
```bash
|
53 |
+
pip install -r requirements.txt
|
54 |
+
```
|
55 |
+
|
56 |
+
2. Run the app:
|
57 |
+
```bash
|
58 |
+
python app.py
|
59 |
+
```
|
60 |
+
|
61 |
+
### Hugging Face Spaces
|
62 |
+
|
63 |
+
This app is deployed on Hugging Face Spaces. The deployment is automatic when changes are pushed to the repository.
|
64 |
+
|
65 |
+
## Data
|
66 |
+
|
67 |
+
The app uses a SQLite database with FTS5 enabled for efficient full-text search. The database contains the complete text of the Samaritan Torah with the following structure:
|
68 |
+
|
69 |
+
- `verses` table: Contains the main verse data
|
70 |
+
- `verses_fts` virtual table: FTS5 table for full-text search
|
71 |
+
|
72 |
+
## License
|
73 |
+
|
74 |
+
MIT License
|
app.py
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import sqlite3
|
3 |
+
import pandas as pd
|
4 |
+
from pathlib import Path
|
5 |
+
import re
|
6 |
+
|
7 |
+
def get_db_connection():
|
8 |
+
db_path = Path(__file__).parent / 'data' / 'samaritanus.db'
|
9 |
+
return sqlite3.connect(db_path)
|
10 |
+
|
11 |
+
def highlight_text(text, search_term):
|
12 |
+
if not search_term:
|
13 |
+
return text
|
14 |
+
pattern = re.escape(search_term)
|
15 |
+
return re.sub(f'({pattern})', r'<mark>\1</mark>', text, flags=re.IGNORECASE)
|
16 |
+
|
17 |
+
def search_verses(search_term, exact_match, page=1, per_page=10):
|
18 |
+
conn = get_db_connection()
|
19 |
+
|
20 |
+
if exact_match:
|
21 |
+
# Exact match
|
22 |
+
query = """
|
23 |
+
SELECT v.*,
|
24 |
+
COUNT(*) OVER() as total_count
|
25 |
+
FROM verses v
|
26 |
+
JOIN verses_fts fts ON v.id = fts.rowid
|
27 |
+
WHERE verses_fts MATCH ?
|
28 |
+
ORDER BY v.book, v.chapter, v.verse
|
29 |
+
LIMIT ? OFFSET ?
|
30 |
+
"""
|
31 |
+
params = [search_term, per_page, (page - 1) * per_page]
|
32 |
+
else:
|
33 |
+
# Fuzzy match using SQLite's built-in fuzzy search
|
34 |
+
words = search_term.split()
|
35 |
+
# Permissive fuzzy match - use more lenient matching
|
36 |
+
fuzzy_terms = []
|
37 |
+
for word in words:
|
38 |
+
# Add the word itself
|
39 |
+
fuzzy_terms.append(word)
|
40 |
+
# Add prefix match
|
41 |
+
fuzzy_terms.append(f"{word}*")
|
42 |
+
match_expr = " OR ".join(fuzzy_terms)
|
43 |
+
|
44 |
+
query = """
|
45 |
+
SELECT v.*,
|
46 |
+
COUNT(*) OVER() as total_count
|
47 |
+
FROM verses v
|
48 |
+
JOIN verses_fts fts ON v.id = fts.rowid
|
49 |
+
WHERE verses_fts MATCH ?
|
50 |
+
ORDER BY v.book, v.chapter, v.verse
|
51 |
+
LIMIT ? OFFSET ?
|
52 |
+
"""
|
53 |
+
params = [match_expr, per_page, (page - 1) * per_page]
|
54 |
+
|
55 |
+
df = pd.read_sql_query(query, conn, params=params)
|
56 |
+
conn.close()
|
57 |
+
return df
|
58 |
+
|
59 |
+
def format_results(results):
|
60 |
+
if results.empty:
|
61 |
+
return "No results found"
|
62 |
+
|
63 |
+
formatted_results = []
|
64 |
+
for _, row in results.iterrows():
|
65 |
+
verse_ref = f"{row['book_name']} {row['chapter']}:{row['verse']}"
|
66 |
+
verse_text = row['text']
|
67 |
+
formatted_results.append(f"<div style='text-align: right; direction: rtl;'><b>{verse_ref}</b><br>{verse_text}</div><hr>")
|
68 |
+
|
69 |
+
return "\n".join(formatted_results)
|
70 |
+
|
71 |
+
def search(search_term, exact_match, page=1):
|
72 |
+
if not search_term:
|
73 |
+
return "Please enter a search term", 1, 1
|
74 |
+
|
75 |
+
results = search_verses(search_term, exact_match, page)
|
76 |
+
|
77 |
+
if results.empty:
|
78 |
+
return "No results found", 1, 1
|
79 |
+
|
80 |
+
total_results = results['total_count'].iloc[0]
|
81 |
+
total_pages = (total_results + 9) // 10 # Round up division
|
82 |
+
|
83 |
+
formatted_results = format_results(results)
|
84 |
+
result_count = f"Found {total_results} {'result' if total_results == 1 else 'results'}"
|
85 |
+
|
86 |
+
return f"{result_count}\n\n{formatted_results}", page, total_pages
|
87 |
+
|
88 |
+
# Custom CSS
|
89 |
+
css = """
|
90 |
+
@font-face {
|
91 |
+
font-family: 'NarkisClassic';
|
92 |
+
src: url('https://raw.githubusercontent.com/johnlockejrr/samaritanus_search/main/public/fonts/NarkisClassic-Regular.woff') format('woff');
|
93 |
+
font-weight: normal;
|
94 |
+
font-style: normal;
|
95 |
+
}
|
96 |
+
@font-face {
|
97 |
+
font-family: 'NarkisClassic';
|
98 |
+
src: url('https://raw.githubusercontent.com/johnlockejrr/samaritanus_search/main/public/fonts/NarkisClassic-Bold.woff') format('woff');
|
99 |
+
font-weight: bold;
|
100 |
+
font-style: normal;
|
101 |
+
}
|
102 |
+
.gradio-container {
|
103 |
+
font-family: 'NarkisClassic', serif;
|
104 |
+
}
|
105 |
+
mark {
|
106 |
+
background-color: yellow;
|
107 |
+
font-weight: bold;
|
108 |
+
}
|
109 |
+
"""
|
110 |
+
|
111 |
+
# Create the Gradio interface
|
112 |
+
with gr.Blocks(css=css) as demo:
|
113 |
+
gr.Markdown("# Samaritan Torah Search")
|
114 |
+
|
115 |
+
# Store current page in session state
|
116 |
+
current_page = gr.State(1)
|
117 |
+
total_pages = gr.State(1)
|
118 |
+
|
119 |
+
with gr.Row():
|
120 |
+
search_input = gr.Textbox(
|
121 |
+
label="Search",
|
122 |
+
placeholder="Enter search term...",
|
123 |
+
scale=4
|
124 |
+
)
|
125 |
+
search_type = gr.Radio(
|
126 |
+
choices=["Exact Match", "Fuzzy Match"],
|
127 |
+
value="Exact Match",
|
128 |
+
label="Search Type",
|
129 |
+
scale=1
|
130 |
+
)
|
131 |
+
|
132 |
+
search_button = gr.Button("Search")
|
133 |
+
|
134 |
+
with gr.Row():
|
135 |
+
output = gr.Markdown()
|
136 |
+
|
137 |
+
with gr.Row(visible=False) as pagination_row:
|
138 |
+
with gr.Column(scale=1):
|
139 |
+
prev_button = gr.Button("←", size="sm")
|
140 |
+
with gr.Column(scale=3):
|
141 |
+
page_info = gr.Markdown()
|
142 |
+
with gr.Column(scale=1):
|
143 |
+
next_button = gr.Button("→", size="sm")
|
144 |
+
|
145 |
+
def search_wrapper(search_term, search_type, page):
|
146 |
+
exact_match = search_type == "Exact Match"
|
147 |
+
return search(search_term, exact_match, page)
|
148 |
+
|
149 |
+
def reset_page():
|
150 |
+
return 1, 1
|
151 |
+
|
152 |
+
def update_pagination(page, total):
|
153 |
+
if total > 1:
|
154 |
+
return gr.Row.update(visible=True), f"Page {page} of {total}"
|
155 |
+
return gr.Row.update(visible=False), ""
|
156 |
+
|
157 |
+
def prev_page(page, total):
|
158 |
+
if page > 1:
|
159 |
+
return page - 1
|
160 |
+
return page
|
161 |
+
|
162 |
+
def next_page(page, total):
|
163 |
+
if page < total:
|
164 |
+
return page + 1
|
165 |
+
return page
|
166 |
+
|
167 |
+
# Search button click
|
168 |
+
search_button.click(
|
169 |
+
fn=reset_page,
|
170 |
+
outputs=[current_page, total_pages]
|
171 |
+
).then(
|
172 |
+
fn=search_wrapper,
|
173 |
+
inputs=[search_input, search_type, current_page],
|
174 |
+
outputs=[output, current_page, total_pages]
|
175 |
+
).then(
|
176 |
+
fn=update_pagination,
|
177 |
+
inputs=[current_page, total_pages],
|
178 |
+
outputs=[pagination_row, page_info]
|
179 |
+
)
|
180 |
+
|
181 |
+
# Search input submit
|
182 |
+
search_input.submit(
|
183 |
+
fn=reset_page,
|
184 |
+
outputs=[current_page, total_pages]
|
185 |
+
).then(
|
186 |
+
fn=search_wrapper,
|
187 |
+
inputs=[search_input, search_type, current_page],
|
188 |
+
outputs=[output, current_page, total_pages]
|
189 |
+
).then(
|
190 |
+
fn=update_pagination,
|
191 |
+
inputs=[current_page, total_pages],
|
192 |
+
outputs=[pagination_row, page_info]
|
193 |
+
)
|
194 |
+
|
195 |
+
# Pagination buttons
|
196 |
+
prev_button.click(
|
197 |
+
fn=prev_page,
|
198 |
+
inputs=[current_page, total_pages],
|
199 |
+
outputs=current_page
|
200 |
+
).then(
|
201 |
+
fn=search_wrapper,
|
202 |
+
inputs=[search_input, search_type, current_page],
|
203 |
+
outputs=[output, current_page, total_pages]
|
204 |
+
).then(
|
205 |
+
fn=update_pagination,
|
206 |
+
inputs=[current_page, total_pages],
|
207 |
+
outputs=[pagination_row, page_info]
|
208 |
+
)
|
209 |
+
|
210 |
+
next_button.click(
|
211 |
+
fn=next_page,
|
212 |
+
inputs=[current_page, total_pages],
|
213 |
+
outputs=current_page
|
214 |
+
).then(
|
215 |
+
fn=search_wrapper,
|
216 |
+
inputs=[search_input, search_type, current_page],
|
217 |
+
outputs=[output, current_page, total_pages]
|
218 |
+
).then(
|
219 |
+
fn=update_pagination,
|
220 |
+
inputs=[current_page, total_pages],
|
221 |
+
outputs=[pagination_row, page_info]
|
222 |
+
)
|
223 |
+
|
224 |
+
if __name__ == "__main__":
|
225 |
+
demo.launch()
|
data/samaritanus.db
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d4083e6bfb2a3bb13cd06170f3aa46d4b5bb75f494aeaa7f1f831f3575d1380
|
3 |
+
size 1327104
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio>=4.19.2
|
2 |
+
pandas>=2.2.0
|
3 |
+
sqlite3
|