
LMFResearchSociety/SDXLLoRAArchive
Text-to-Image
โข
Updated
โข
58
Being Ryan Gosling, being Patrick Bateman, watching Blade Runner 2049 (2017) on repeat, rewatching American Psycho (2000), watching Barbie (2023). Get Kenergetic!
import pathlib
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
tokenizer = AutoTokenizer.from_pretrained("<ModernBERT>")
# Triton is **required**, but no where in the documentation is specified that triton is needed.
# Installing triton in windows isn't super straightforward. Thankfully someone has already built wheels for it.
# - https://github.com/woct0rdho/triton-windows/releases
model = AutoModelForSequenceClassification.from_pretrained(
"<ModernBERT>", # reference_compile=False
)
# By default it uses CPU. Which is slow. Move to a cuda device.
# This will actually error out if you use "gpu" instead.
model = model.to("cuda")
with torch.no_grad():
# Not setting `return_tensors="pt"` causes
# File "C:\Program Files\Python310\lib\site-packages\transformers\modeling_utils.py", line 5311, in warn_if_padding_and_no_attention_mask
# if self.config.pad_token_id in input_ids[:, [-1, 0]]:
# TypeError: list indices must be integers or slices, not tuple
# or...
# File "C:\Program Files\Python310\lib\site-packages\transformers\models\modernbert\modeling_modernbert.py", line 836, in forward
# batch_size, seq_len = input_ids.shape[:2]
# AttributeError: 'list' object has no attribute 'shape'
block = tokenizer(
pathlib.Path("test-fic.txt").read_text("utf-8"), return_tensors="pt"
)
block = block.to("cuda")
# **block is needed to fix "AttributeError: 'NoneType' object has no attribute 'unsqueeze'" on attention_mask.unsqueeze(-1)
logits = model(**block).logits
# Not moving to cpu will cause the sigmoid/softmax ops to fail.
logits = logits.to("cpu")
# print(logits)
predicted_class_ids = torch.softmax(logits, -1)[
0
].numpy()
from loadimg import load_img
from huggingface_hub import InferenceClient
# or load a local image
my_b64_img = load_img(imgPath_url_pillow_or_numpy ,output_type="base64" )
client = InferenceClient(api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": my_b64_img # base64 allows using images without uploading them to the web
}
}
]
}
]
stream = client.chat.completions.create(
model="meta-llama/Llama-3.2-11B-Vision-Instruct",
messages=messages,
max_tokens=500,
stream=True
)
for chunk in stream:
print(chunk.choices[0].delta.content, end="")