Neo111x's picture
Update README.md
27e4d80 verified

Introduction of Falcon3-decompile-3b

Falcon3-decompiler-3b aims to decompile x86 assembly instructions into C.

Evaluation Results

The benchmark that have been used is HumanEval benchmark from LLM4Decompile Benchmark

How to Use

Here is an example of how to use our model Note: Replace asm_func with the function that you want to decompile

Decompilation: Use falcon3-decompiler-3b to translate ghidra decompilation output to more readable code:

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_path = 'LLM4Binary/llm4decompile-1.3b-v1.5' # V1.5 Model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,torch_dtype=torch.bfloat16).cuda()

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

asm_func = """
char * func0(char **param_1,int param_2)

{
  char **ppcVar1;
  char *__s;
  size_t sVar2;
  int iVar3;
  char *pcVar4;
  
  pcVar4 = "";
  if (0 < param_2) {
    iVar3 = 0;
    ppcVar1 = param_1 + (ulong)(param_2 - 1) + 1;
    do {
      __s = *param_1;
      sVar2 = strlen(__s);
      if (iVar3 < (int)sVar2) {
        pcVar4 = __s;
        iVar3 = (int)sVar2;
      }
      param_1 = param_1 + 1;
    } while (param_1 != ppcVar1);
  }
  return pcVar4;
}
"""

before = f"# This is the assembly code:\n"#prompt
after = "\n# What is the source code?\n"#prompt
asm_func = before+asm_func.strip()+after
model_path = "Neo111x/falcon3-decompiler-3b"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto", device_map="auto").to("cuda:0")

inputs = tokenizer(asm_func, return_tensors="pt").to("cuda:0")
with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=2048)### max length to 4096, max new tokens should be below the range
c_func_decompile = tokenizer.decode(outputs[0][len(inputs[0]):-1])

# Note only decompile one function, where the original file may contain multiple functions

print(f'decompiled function:\n{c_func_decompile}')

Contact

If you have any questions, please raise an issue.