umit-LLMs
Collection
5 items • Updated
~57M parameter GPT style toy model trained with wikipedia.en 1M rows.
epochs:3
Trained on RTX 2060 6GB VRAM 54 GB RAM
import torch
import argparse
from transformers import pipeline
def main():
parser = argparse.ArgumentParser(description="Minimal Llama Inference")
# This is the "model_id argument" you asked for
parser.add_argument(
"model_id",
nargs="?",
default="uisikdag/umitllama-base-english",
help="Hugging Face model ID or local path"
)
args = parser.parse_args()
print(f"Loading model from HF: {args.model_id}")
# Create pipeline
generator = pipeline(
"text-generation",
model=args.model_id,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto",
trust_remote_code=True
)
# Generate
prompt = "The economy is:"
print(f"\nPrompt: {prompt}")
output = generator(
prompt,
max_new_tokens=50,
do_sample=True,
temperature=0.7,
pad_token_id=generator.tokenizer.eos_token_id
)
print(f"\nResponse:\n{output[0]['generated_text']}")
if __name__ == "__main__":
main()