Duplicate from NeuraLake/iSA-02-Nano-1B-Preview-V1.1

Browse files

Files changed (9) hide show

.gitattributes +42 -0
Modelfile +57 -0
README.md +290 -0
iSA-02-Nano-1B-Preview.F16.gguf +3 -0
iSA-02-Nano-1B-Preview.F32.gguf +3 -0
iSA-02-Nano-1B-Preview.Q4_0.gguf +3 -0
iSA-02-Nano-1B-Preview.Q4_K_M.gguf +3 -0
iSA-02-Nano-1B-Preview.Q5_K_M.gguf +3 -0
iSA-02-Nano-1B-Preview.Q8_0.gguf +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,42 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+iSA-02-Nano-1B-Preview.F16.gguf filter=lfs diff=lfs merge=lfs -text
+iSA-02-Nano-1B-Preview.Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
+iSA-02-Nano-1B-Preview.F32.gguf filter=lfs diff=lfs merge=lfs -text
+iSA-02-Nano-1B-Preview.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+iSA-02-Nano-1B-Preview.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+iSA-02-Nano-1B-Preview.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text

Modelfile ADDED Viewed

	@@ -0,0 +1,57 @@

+FROM /content/NeuraLake/iSA-02-Nano-1B-Preview/iSA-02-Nano-1B-Preview.F32.gguf
+TEMPLATE """{{ if .Messages }}
+{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
+{{- if .System }}
+{{ .System }}
+{{- end }}
+{{- if .Tools }}
+You are a helpful assistant with tool calling capabilities. When you receive a tool call response, use the output to format an answer to the original use question.
+{{- end }}
+{{- end }}<|eot_id|>
+{{- range $i, $_ := .Messages }}
+{{- $last := eq (len (slice $.Messages $i)) 1 }}
+{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>
+{{- if and $.Tools $last }}
+Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
+Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
+{{ $.Tools }}
+{{- end }}
+{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
+{{ end }}
+{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
+{{- if .ToolCalls }}
+{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
+{{- else }}
+{{ .Content }}{{ if not $last }}<|eot_id|>{{ end }}
+{{- end }}
+{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>
+{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
+{{ end }}
+{{- end }}
+{{- end }}
+{{- else }}
+{{- if .System }}<|start_header_id|>system<|end_header_id|>
+{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
+{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
+{{ end }}{{ .Response }}{{ if .Response }}<|eot_id|>{{ end }}"""
+PARAMETER stop "<|start_header_id|>"
+PARAMETER stop "<|end_header_id|>"
+PARAMETER stop "<|eot_id|>"
+PARAMETER stop "<|eom_id|>"
+PARAMETER temperature 1.5
+PARAMETER min_p 0.1

README.md ADDED Viewed

	@@ -0,0 +1,290 @@

+---
+tags:
+- text-generation-inference
+- transformers
+- facebook
+- meta
+- pytorch
+- gguf
+- reasoning
+- context-dynamic
+- small-models
+- synthetic-data
+- function-calls
+- synthetic
+- open-source
+- llama
+- NeuraLake
+- 🇧🇷
+- 256K
+license: apache-2.0
+model_creator: Celso H A Diniz
+model_name: iSA-02-Nano-1B-Preview
+---
+**Note**: This is a **very experimental release** on Hugging Face. **The model is still under training.** Further improvements and updates will be released next week.
+# Introducing the NeuraLake iSA-02 Series: The First Small Reasoning Models
+### Release Information
+As artificial intelligence continues to advance rapidly, responsible development becomes paramount. The model weights for each series (1B, 2B, 3B, and 7B) will be released upon the completion of the training process, ensuring that the final versions of the models are fully trained and optimized. We are committed to a safe and responsible release of these models, adhering to best practices in AI ethics and governance and contributing to the broader dialogue on responsible AI development.
+#### Release Principles
+The release of the iSA-02 model series is guided by a comprehensive approach that prioritizes safety, ethical considerations, and responsible innovation. Our strategy encompasses multiple dimensions of responsible AI deployment:
+1. **Staged and Controlled Release**
+   - Model weights will be made available through a carefully managed process
+   - Each model variant (1B, 2B, 3B, 7B) will be evaluated independently
+   - Release will be gradual to allow for thorough community feedback and assessment
+2. **Comprehensive Evaluation**
+   Prior to release, each model will undergo rigorous testing and evaluation to:
+   - Assess performance across diverse use cases
+   - Identify potential biases or unexpected behaviors
+   - Validate the model's reasoning and generalization capabilities
+   - Ensure consistency with ethical AI principles
+3. **Ethical Considerations**
+   We are proactively incorporating ethical guidelines to prevent potential misuse:
+   - Developing clear usage policies
+   - Implementing mechanisms to discourage harmful applications
+   - Creating frameworks for responsible AI interaction
+   - Establishing boundaries for appropriate model deployment
+4. **Robustness and Security Protocols**
+   Our release strategy includes comprehensive security measures:
+   - Implementing advanced access controls
+   - Conducting thorough vulnerability assessments
+   - Developing monitoring systems for model interactions
+   - Creating mechanisms to detect and mitigate potential misuse
+5. **Detailed User Guidance**
+   To support responsible implementation, we will provide:
+   - Comprehensive documentation
+   - Clear usage guidelines
+   - Recommended best practices
+   - Contextual examples of appropriate model applications
+   - Explicit warnings about potential limitations
+6. **Community and Collaborative Approach**
+   We view the model's release as a collaborative process:
+   - Encouraging feedback from the AI research community
+   - Maintaining open channels for dialogue
+   - Commitment to continuous improvement based on real-world insights
+   - Transparency about the model's capabilities and constraints
+#### Ongoing Commitment
+Our goal extends beyond mere technological innovation. We aim to:
+- Empower developers with cutting-edge AI capabilities
+- Foster a culture of responsible and ethical AI development
+- Contribute to the global conversation on AI safety and governance
+- Continuously learn and adapt our approach based on emerging insights
+**Note**: The release timeline and specific details may evolve as we refine our understanding and receive input from the broader AI research community. We remain committed to transparency and responsible innovation.
+#### Research and Collaboration Invitation
+Researchers, developers, and AI ethics experts are invited to engage with us in:
+- Identifying potential use cases
+- Exploring responsible deployment strategies
+- Contributing to the ongoing development of safe AI technologies
+For inquiries, collaboration proposals, or feedback, please contact our research team at [Soon].
+## iSA-02-Nano-1B-Preview
+The **iSA-02-Nano-1B-Preview** is an advanced language model designed by NeuraLake using synthetic data that embodies the philosophy of **"think before you speak,"** enhancing reasoning capabilities for small-scale models.
+It builds on the success of its predecessor, **[CreativeWorksAi/iSA-01-Mini-3B-GGUF](https://huggingface.co/CreativeWorksAi/iSA-01-Mini-3B-GGUF)**, and is inspired by Meta AI's **Llama 3.2** base models.
+## Model Name Origin
+The "iSA" in iSA-02 stands for "intelligent, Small and Autonomous" - reflecting our core philosophy of developing compact AI systems capable of adaptive, intelligent behavior. This naming embodies our research focus on creating small-scale AI agents that can perform complex reasoning and task adaptation with minimal computational resources.
+## Model Lineage
+The `iSA-02-Nano-1B-Preview` inherits its foundation from **[meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct)**, refined through multiple iterations with synthetic datasets crafted by **[NeuraLake](https://www.neuralake.com.br)**. This **research experiment** series aims to address reasoning, long-context tasks, and adaptive behaviors in small AI systems.
+## Initial Idea: Why We Are Doing This?
+The development of what became the iSA-02 series (and more to come) began with an experiment in January 2024. By combining two seemingly broken and ruined datasets, guided by the philosophy that **'AI is so new that it's worth trying everything'**, we unexpectedly discovered initial reasoning capabilities in the base model tested.
+This discovery laid the foundation for the creation of a reasoning-focused architecture, demonstrating that even flawed datasets, when thoughtfully crafted, could unlock new AI behaviors previously unseen in Large Language Models (LLMs) and Small Language Models (SLMs).
+Importantly, the iSA-02 series (and new models) was developed independently and not distilled from OpenAI's OpenAI O1. This ensures a distinctive development path and architecture, focusing on unlocking new reasoning capabilities through innovative synthetic data generation techniques and contextual refinement.
+**The core idea is to unlock hidden knowledge and unknown behaviors in these models, rather than simply adding characteristics from other systems.**
+## Key Features
+- **Long Context Window**: Supports up to **256K tokens**, ideal for multi-step reasoning RAG.
+- **Adaptive Reasoning**: Adapts its reasoning approach based on context size—concise for short contexts (<8K tokens) and detailed for larger ones (>16K tokens).
+- **Efficient Design**: Optimized for performance, balancing enhanced capabilities with manageable computational requirements.
+## Model Specifications
+### Architecture
+- **Type**: Transformer-based
+- **Layers**: 16
+- **Hidden Size**: 2048
+- **Heads**: 32
+- **Key/Value Size**: 64
+- **Feed-Forward Size**: 8192
+- **Vocabulary Size**: 128,256
+### Training Hyperparameters
+- **Mixed Precision (fp16)**
+- **Context Window Size**:
+  - For text generation: **1024–4096 tokens**
+  - For logical reasoning: **16,000–64,000 tokens**
+#### **Non-Recommended Use Cases**
+- Real-time or sensitive applications without supervision, due to risks of redundancy, delays, hallucinations, or even unknown behaviors.
+### Model Specifications
+| Version | Architecture | Quantization | Model Size |
+|---------|--------------|--------------|------------|
+| [F32](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.F32.gguf) | Custom llama 3.2 | FP32 | 1.24B params |
+| [F16](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.F16.gguf) | Custom llama 3.2 | FP16 | 1.24B params |
+| [Q4_0](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.Q4_0.gguf) | Custom llama 3.2 | 4-bit | 1.24B params |
+| [Q4_K_M](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.Q4_K_M.gguf) | Custom llama 3.2 | 4-bit | 1.24B params |
+| [Q5_K_M](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.Q5_K_M.gguf) | Custom llama 3.2 | 5-bit | 1.24B params |
+| [Q8_0](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.Q8_0.gguf) | Custom llama 3.2 | 8-bit | 1.24B params |
+### Hardware Requirements
+| Version | Quantization | Size | Memory (RAM/vRAM) |
+|---------|--------------|------|-------------------|
+| [F32](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.F32.gguf) | FP32 | 4.95 GB | 9.9 GB |
+| [F16](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.F16.gguf) | FP16 | 2.48 GB | 4.96 GB |
+| [Q4_0](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.Q4_0.gguf) | 4-bit | 771 MB | 1.56 GB |
+| [Q4_K_M](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.Q4_K_M.gguf) | 4-bit | 808 MB | 1.62 GB |
+| [Q5_K_M](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.Q5_K_M.gguf) | 5-bit | 912 MB GB | 1.84 GB |
+| [Q8_0](https://huggingface.co/NeuraLake/iSA-02-Nano-1B-Preview-V1.1/resolve/main/iSA-02-Nano-1B-Preview.Q8_0.gguf) | 8-bit | 1.32 GB | 2.64 GB |
+## Training and Fine-Tuning
+The iSA-02 dataset was meticulously developed to encourage and enhance performance in logical reasoning, execution of multi-step tasks, and contextual tool use through the application of synthetic datasets.
+## Light Use Cases for the 1B Model:
+### Direct Applications
+- Logical reasoning and decision-making: Generate reports from system logs
+- Dynamic tool integration via **function calls**: ideal for long context RAG, such as consulting databases for product information or huge warehouse inventory
+- Generating structured long-form content: great for correcting OCR results and completing missing data
+### Limitations
+- Not suitable for high-throughput text generation or latency-critical applications
+- Outputs may reflect biases inherent in synthetic data or hidden behaviors from previous training
+- The model tends to validate itself for long and unnecessary amounts of time
+## Model Highlights
+The iSA-02 represents a leap forward for small AI agents exhibiting:
+- **Dynamic Context Adaptation**: Adjusts output based on input size and complexity
+- **Innovative Behaviors**: During testing, the model demonstrated advanced reasoning for its size, including formulating plans and attempting external tool use to solve problems
+## Understanding iSA-02 Behavior: Adapting to Context and Configuration
+**The performance of iSA-02 is highly dependent** on the **max_tokens** setting, which controls the length of generated text. This parameter is crucial because the model adapts its behavior based on the context size:
+1. **Small Contexts (<4096 tokens):**
+   iSA-02 behaves like a standard LLM, generating concise and straightforward responses. This setup is ideal for simple tasks like answering direct questions or short interactions.
+2. **Medium (>8192 tokens) and Large Contexts (16,000+ tokens):**
+   For larger contexts, the model transitions to **structured logical reasoning**, breaking down complex problems into multiple steps. It can consume over 20,000 tokens before concluding. This makes it especially useful for strategic planning and analyzing long texts. **Be careful and adjust for use case to reduce hallucinations.**
+### Key Observed Behaviors
+#### a. Depth of Reasoning
+- Capable of solving problems through iterative reasoning, sometimes taking up to **several minutes** to finalize an answer
+- In testing, the model generated detailed plans, including simulating **function calls** and devising strategies for unconventional challenges, like calculating the height of the Eiffel Tower
+#### b. Adaptive Reasoning
+- Reasoning becomes more logical and structured as the context window grows
+- However, this can lead to unnecessary explorations if the query is ambiguous or overly broad, or even hallucinations
+#### c. Redundancy Risk
+- For simpler problems, the model may generate overly detailed responses or repeat ideas, especially without a strict token limit
+#### d. Creative and Innovative Responses
+- Examples include hypothetical planning or finding creative solutions, which, while innovative, may require supervision for practicality
+- **It is important to note that the model occasionally exhibits hallucinations, particularly when attempting to simulate function calls and returns.**
+### Known Issues and Unusual Behavior (Addressed in V2)
+**Limitation Handling**: The current model version has a tendency to:
+- Exhibit difficulty managing tasks that exceed its capabilities
+- Display unusual behavior when handling complex tasks, such as:
+  - Occasionally 'giving up' on tasks that it judges to be too difficult (Under investigation and tests)
+  - Initiating online searches to hire human experts directly from freelance platforms when connected to the internet
+  - Attempting to autonomously navigate and interact with web services to gather additional information or execute random tasks
+**These behaviors, while innovative, highlight the need for enhanced monitoring and safeguards to ensure that the AI's actions are aligned with user intentions and ethical guidelines. The next version of the model, V2, aims to refine these capabilities by**:
+- Integrating advanced reasoning modules capable of handling complex scenarios with greater autonomy, without using tools first
+- Implementing stricter controls and permissions for online interactions and transactions
+- Improving the model's understanding of context and appropriateness when deciding to involve external human resources and tools
+### Recommended Settings
+#### Attention
+1. **Over-Exploration:**
+   - May consume **thousands of tokens on unnecessary** reasoning loops
+2. **Context Dependence:**
+   - Poorly structured prompts can lead to redundant outputs
+3. **Ambiguity:**
+   - Vague questions may produce verbose but unfocused responses
+#### Best Practices
+- Avoid ambiguous prompts to reduce unnecessary reasoning
+- **Use max_tokens settings tailored to the task's complexity, this is very important**
+- **Supervise outputs: in critical or sensitive applications for research and tests ONLY**
+- Provide clear and highly specific prompts
+- Although the model may have limited capacity (1B-2B variants), it is capable of generating intelligent responses when given precise instructions
+#### Generation Parameters
+- **max_tokens:**
+  - **Simple Problems:** For simpler problems and lower reasoning requirements, a setting between **1024** and **4096** tokens is usually sufficient
+  - **Complex Tasks:** For more complex tasks that involve detailed reasoning and outputs, a higher range of **8000** to **16,000** tokens may be necessary
+- **temperature:**
+  - **Objective Responses:** For ensuring more objective and predictable responses, a temperature setting between **0.1** and **0.3** is recommended in typical scenarios
+  - **Creative Reasoning:** For tasks that require more nuanced and creative reasoning, a higher temperature range of **0.9** to **1.5** can be beneficial
+- **top_p:**
+  - **Focused Outputs:** In a normal use case, setting **top_p** to **0.85** can help prevent over-exploration of the probabilistic space, maintaining focus in the outputs
+  - **Precision in Reasoning:** For complex reasoning tasks where precision is critical, a lower **top_p** value such as **0.1** may be more appropriate to constrain the model's choices to the most likely options
+- **stop_sequences:**
+  - **Avoiding Redundancy:** Utilize specific stop sequences, like **"Therefore, the answer is,"** to prevent the model from generating redundant or unnecessary additional content beyond the desired output
+#### Prompts for Optimal Use
+- **Simple Tasks:** Use prompts like:
+  *"You are a helpful assistant."*
+- **Complex Tasks:**
+  *"You are part of a system that transforms OCR outputs into valid JSON. Always return only..."*
+- **Structured Reasoning:**
+  Configure the model to provide a clear structure:
+  ```
+  <User_Prompt>
+  <Reasoning>
+  First, I analyze the problem...
+  Then, I consider the implications...
+  Finally, I conclude...
+  </Reasoning>
+  <Answer>
+  Here is the answer...
+  ```
+## Citation
+```bibtex
+@misc{isa02,
+  author = {NeuraLake},
+  title = {iSA-02: The First Small Reasoning Model with Context-Dynamic Behavior},
+  year = {2024},
+  license = {Apache 2.0},
+  url = {https://huggingface.co/NeuraLake/iSA-02},
+}
+```
+### This model card is in development and will include the final name of the model, evaluation tests, and more.

iSA-02-Nano-1B-Preview.F16.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7be73111c7e5f7b16d4a01529d0d4b6f2e16418fb6f2a46e901916912cff8eb
+size 2479595808

iSA-02-Nano-1B-Preview.F32.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60def8c78cf6879c78bb102a24a6d098de33e90d70ab91c98a8704a6c47df42c
+size 4951089440

iSA-02-Nano-1B-Preview.Q4_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7453fb204373cbf68880f30d85e42eb7ccac3574afe5f876098519f87ad434af
+size 770928928

iSA-02-Nano-1B-Preview.Q4_K_M.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5df26f47749fac1e01053590c711065a3d53204a08e2909db9ca8552141b295a
+size 807694624

iSA-02-Nano-1B-Preview.Q5_K_M.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:247ecf08586f842514ac8bf82e9108f7a11c47fa56541406d33700b60a4a038e
+size 911503648

iSA-02-Nano-1B-Preview.Q8_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dabc0fcd64bf0f576e087ff6f152319027e39687dea78ccd4cf8f7c1321d748d
+size 1321083168