Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Visual Search System - Complete Streamlit App | |
| ============================================ | |
| A comprehensive Streamlit application that: | |
| 1. Automatically installs required dependencies | |
| 2. Downloads images from photos_url.csv if needed | |
| 3. Provides a clean UI for searching and viewing images | |
| 4. Supports both search by ID and range by block functionality | |
| Requirements: | |
| - photos_url.csv: Contains image URLs | |
| - download_images.py: Contains parallel downloading logic | |
| - images/ folder: Will be created and populated with downloaded images | |
| Usage: | |
| streamlit run app.py | |
| Hugging Face Deployment: | |
| This app is configured for Hugging Face Spaces deployment. | |
| Upload all files and it will run automatically. | |
| """ | |
| import os | |
| import sys | |
| import subprocess | |
| import importlib | |
| from pathlib import Path | |
| import pandas as pd | |
| import streamlit as st | |
| from typing import List, Tuple, Optional | |
| import time | |
| # Configuration | |
| REQUIRED_PACKAGES = [ | |
| "streamlit", | |
| "pandas", | |
| "requests", | |
| "PIL", | |
| "tqdm" | |
| ] | |
| IMAGES_DIR = "images" | |
| CSV_FILE = "photos_url.csv" | |
| DOWNLOAD_SCRIPT = "download_images.py" | |
| MAX_DISPLAY_IMAGES = 500 | |
| IMAGES_PER_BLOCK = 100 | |
| TOTAL_BLOCKS = 250 | |
| def install_package(package: str) -> bool: | |
| """ | |
| Install a Python package using pip | |
| Args: | |
| package: Package name to install | |
| Returns: | |
| True if successful, False otherwise | |
| """ | |
| try: | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", package]) | |
| return True | |
| except subprocess.CalledProcessError: | |
| return False | |
| def check_and_install_dependencies() -> bool: | |
| """ | |
| Check if required packages are installed, install if missing | |
| Returns: | |
| True if all dependencies are available, False otherwise | |
| """ | |
| print("π Checking dependencies...") | |
| missing_packages = [] | |
| for package in REQUIRED_PACKAGES: | |
| try: | |
| importlib.import_module(package) | |
| print(f"β {package} is already installed") | |
| except ImportError: | |
| print(f"π¦ Installing {package}...") | |
| missing_packages.append(package) | |
| if missing_packages: | |
| print(f"π Installing {len(missing_packages)} missing packages...") | |
| for package in missing_packages: | |
| print(f"π₯ Installing {package}...") | |
| if install_package(package): | |
| print(f"β Successfully installed {package}") | |
| else: | |
| print(f"β Failed to install {package}") | |
| return False | |
| # Verify installations | |
| for package in missing_packages: | |
| try: | |
| importlib.import_module(package) | |
| print(f"β {package} verified after installation") | |
| except ImportError: | |
| print(f"β {package} still not available after installation") | |
| return False | |
| print("β All dependencies are available!") | |
| return True | |
| def check_images_status() -> Tuple[bool, int, int]: | |
| """ | |
| Check the status of downloaded images | |
| Returns: | |
| Tuple of (is_complete, current_count, total_count) | |
| """ | |
| images_path = Path(IMAGES_DIR) | |
| if not images_path.exists(): | |
| return False, 0, 0 | |
| # Count existing images | |
| existing_images = list(images_path.glob("*.jpg")) | |
| current_count = len(existing_images) | |
| # Get total count from CSV | |
| try: | |
| df = pd.read_csv(CSV_FILE) | |
| total_count = len(df) | |
| except Exception as e: | |
| print(f"β Error reading {CSV_FILE}: {e}") | |
| return False, current_count, 0 | |
| is_complete = current_count >= total_count * 0.95 # Consider complete if 95%+ downloaded | |
| return is_complete, current_count, total_count | |
| def download_images_if_needed() -> bool: | |
| """ | |
| Download images if they're missing or incomplete | |
| Returns: | |
| True if images are available, False otherwise | |
| """ | |
| print("π Checking image status...") | |
| is_complete, current_count, total_count = check_images_status() | |
| if is_complete: | |
| print(f"β Images are ready! Have {current_count:,} of {total_count:,} images") | |
| return True | |
| print(f"π₯ Images incomplete: {current_count:,} of {total_count:,} available") | |
| print("π Starting image download...") | |
| try: | |
| # Import download functions from download_images.py | |
| sys.path.append('.') | |
| from download_images import download_images | |
| success = download_images( | |
| num_images=None, # Download all images | |
| output_dir=IMAGES_DIR, | |
| max_workers=20 | |
| ) | |
| if success: | |
| print("β Image download completed successfully!") | |
| return True | |
| else: | |
| print("β οΈ Image download had some issues, but continuing...") | |
| return True | |
| except Exception as e: | |
| print(f"β Error during image download: {e}") | |
| return False | |
| def get_image_path(image_id: str) -> Optional[str]: | |
| """ | |
| Get the file path for a given image ID | |
| Args: | |
| image_id: Image ID (e.g., "0001", "1234") | |
| Returns: | |
| File path if exists, None otherwise | |
| """ | |
| try: | |
| # Convert image ID to filename format | |
| if image_id.isdigit(): | |
| filename = f"{int(image_id):04d}.jpg" | |
| else: | |
| filename = f"{image_id}.jpg" | |
| image_path = os.path.join(IMAGES_DIR, filename) | |
| if os.path.exists(image_path): | |
| return image_path | |
| else: | |
| return None | |
| except: | |
| return None | |
| def get_block_images(block_number: int) -> List[str]: | |
| """ | |
| Get all images for a specific block | |
| Args: | |
| block_number: Block number (1-250) | |
| Returns: | |
| List of image paths for the block | |
| """ | |
| if not (1 <= block_number <= TOTAL_BLOCKS): | |
| return [] | |
| # Calculate start and end image numbers for this block | |
| start_num = (block_number - 1) * IMAGES_PER_BLOCK + 1 | |
| end_num = block_number * IMAGES_PER_BLOCK | |
| image_paths = [] | |
| for i in range(start_num, end_num + 1): | |
| image_path = get_image_path(str(i)) | |
| if image_path: | |
| image_paths.append(image_path) | |
| return image_paths | |
| def search_images_by_id(search_id: str) -> List[str]: | |
| """ | |
| Search for images by ID | |
| Args: | |
| search_id: Search term (can be partial) | |
| Returns: | |
| List of matching image paths | |
| """ | |
| if not search_id.strip(): | |
| # Return first 500 images if no search term | |
| return [get_image_path(str(i)) for i in range(1, MAX_DISPLAY_IMAGES + 1) | |
| if get_image_path(str(i))] | |
| # Search for exact or partial matches | |
| matching_paths = [] | |
| # Try exact match first | |
| exact_path = get_image_path(search_id) | |
| if exact_path: | |
| matching_paths.append(exact_path) | |
| # Search for partial matches | |
| for i in range(1, 25001): # Total images in dataset | |
| image_path = get_image_path(str(i)) | |
| if image_path and search_id.lower() in str(i): | |
| if image_path not in matching_paths: | |
| matching_paths.append(image_path) | |
| if len(matching_paths) >= MAX_DISPLAY_IMAGES: | |
| break | |
| return matching_paths | |
| def display_image_grid(image_paths: List[str], title: str): | |
| """ | |
| Display a grid of images using Streamlit | |
| Args: | |
| image_paths: List of image file paths | |
| title: Title for the image grid | |
| """ | |
| if not image_paths: | |
| st.warning("No images found matching your criteria.") | |
| return | |
| st.subheader(f"{title} ({len(image_paths)} images)") | |
| # Create columns for the grid (3 columns) | |
| cols = st.columns(3) | |
| for idx, image_path in enumerate(image_paths): | |
| col_idx = idx % 3 | |
| with cols[col_idx]: | |
| try: | |
| st.image(image_path, caption=f"Image {os.path.basename(image_path)}", use_column_width=True) | |
| except Exception as e: | |
| st.error(f"Error loading image: {e}") | |
| def main(): | |
| """Main Streamlit application""" | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Visual Search System", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Main title | |
| st.title("π Visual Search System") | |
| st.markdown("---") | |
| # Sidebar for navigation | |
| st.sidebar.header("Navigation") | |
| search_option = st.sidebar.selectbox( | |
| "Choose search method:", | |
| ["Search by ID", "Range by Block"] | |
| ) | |
| # Main content area | |
| if search_option == "Search by ID": | |
| st.header("π Search Images by ID") | |
| # Search input | |
| search_id = st.text_input( | |
| "Enter image ID (e.g., '0001', '1234') or leave empty to see first 500 images:", | |
| placeholder="Enter ID or leave empty", | |
| help="Enter a specific image ID or leave empty to browse the first 500 images" | |
| ) | |
| # Search button | |
| if st.button("π Search", type="primary") or search_id != "": | |
| with st.spinner("Searching images..."): | |
| matching_images = search_images_by_id(search_id) | |
| if matching_images: | |
| display_image_grid( | |
| matching_images, | |
| f"Showing {len(matching_images)} matching images" | |
| ) | |
| else: | |
| st.info("No images found matching your search criteria.") | |
| else: # Range by Block | |
| st.header("π¦ Browse Images by Block") | |
| st.markdown(f""" | |
| **How it works:** | |
| - Each block contains **{IMAGES_PER_BLOCK} images** | |
| - Enter a number between **1 and {TOTAL_BLOCKS}** | |
| - Example: Enter **100** to see images **10001-10100** | |
| """) | |
| # Block input | |
| block_number = st.number_input( | |
| f"Enter block number (1-{TOTAL_BLOCKS}):", | |
| min_value=1, | |
| max_value=TOTAL_BLOCKS, | |
| value=1, | |
| step=1, | |
| help=f"Choose a block number from 1 to {TOTAL_BLOCKS}" | |
| ) | |
| # Calculate and display block info | |
| start_num = (block_number - 1) * IMAGES_PER_BLOCK + 1 | |
| end_num = block_number * IMAGES_PER_BLOCK | |
| st.info(f"**Block {block_number}**: Images {start_num:,} to {end_num:,}") | |
| # Get block images | |
| with st.spinner(f"Loading block {block_number}..."): | |
| block_images = get_block_images(block_number) | |
| if block_images: | |
| display_image_grid( | |
| block_images, | |
| f"Block {block_number} - Images {start_num:,} to {end_num:,}" | |
| ) | |
| else: | |
| st.warning(f"No images found for block {block_number}.") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown( | |
| "**Dataset Info:** 25,000+ high-quality images from Unsplash | " | |
| "Built with Streamlit and Python" | |
| ) | |
| def setup_and_run(): | |
| """Setup dependencies and run the app""" | |
| print("π Starting Visual Search System...") | |
| # Step 1: Install dependencies | |
| if not check_and_install_dependencies(): | |
| print("β Failed to install dependencies. Exiting.") | |
| sys.exit(1) | |
| print("β Dependencies ready!") | |
| # Step 2: Check and download images | |
| if not download_images_if_needed(): | |
| print("β Failed to prepare images. Exiting.") | |
| sys.exit(1) | |
| print("β Images ready!") | |
| # Step 3: Launch Streamlit app | |
| print("π Launching Streamlit app...") | |
| main() | |
| if __name__ == "__main__": | |
| setup_and_run() |