Skip to content

Error, when loading llama3: AttributeError: module 'transformers.models.bit.modeling_bit' has no attribute 'Linear' #2191

Open
@lss191143

Description

@lss191143

Library:

  • install unsloth with pip install unsloth
  • (transformer == 4.50.0)

Code:

I'm running the code from the notebook"Llama3_(8B)-Ollama.ipynb"(https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3_(8B)-Ollama.ipynb#scrollTo=R_Srz9REGK2u).

And the model I use below is downloaded from huggingface(https://huggingface.co/unsloth/Llama-3.2-1B-unsloth-bnb-4bit). I think there's no mistakes.

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-v0.3-bnb-4bit",      # New Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/llama-3-8b-bnb-4bit",           # Llama-3 15 trillion tokens model 2x faster!
    "unsloth/llama-3-8b-Instruct-bnb-4bit",
    "unsloth/llama-3-70b-bnb-4bit",
    "unsloth/Phi-3-mini-4k-instruct",        # Phi-3 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit",             # Gemma 2.2x faster!
] # More models at https://huggingface.co/unsloth

# model_file_path = '/data2/lss/.cache/huggingface/hub/models--unsloth--llama-3-8b-bnb-4bit'
# model_file_path = '/data2/lss/.cache/huggingface/Llama-3.2-3B-Instruct'
model_file_path = '/data2/lss/.cache/huggingface/hub/models--unsloth--Llama-3.2-1B-unsloth-bnb-4bit'

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_file_path,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

Actually all I've done is just to run the code in notebook(fine-tune llama3) in my own jupyter notebook, and I've build a new and clean conda environment for unsloth. I haven't added anything else, so I have no idea to handle this. Maybe I have to change another version of transformers?

Full Traceback:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[2], line 22
     18 # model_file_path = '/data2/lss/.cache/huggingface/hub/models--unsloth--llama-3-8b-bnb-4bit'
     19 # model_file_path = '/data2/lss/.cache/huggingface/Llama-3.2-3B-Instruct'
     20 model_file_path = '/data2/lss/.cache/huggingface/hub/models--unsloth--Llama-3.2-1B-unsloth-bnb-4bit'
---> 22 model, tokenizer = FastLanguageModel.from_pretrained(
     23     model_name = model_file_path,
     24     max_seq_length = max_seq_length,
     25     dtype = dtype,
     26     load_in_4bit = load_in_4bit,
     27     # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
     28 )

File ~/install/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth/models/loader.py:308, in FastLanguageModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, load_in_8bit, full_finetuning, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, use_exact_model_name, fast_inference, gpu_memory_utilization, float8_kv_cache, random_state, max_lora_rank, disable_log_stats, *args, **kwargs)
    300     dispatch_model = FastQwen2Model
    301 # Temporary disable optimized Cohere until errors match
    302 # elif model_type == "cohere":
    303 #     dispatch_model = FastCohereModel
   (...)
    306 #     dispatch_model = FastGraniteModel
    307 else:
--> 308     return FastModel.from_pretrained(
    309         model_name                 = model_name,
    310         max_seq_length             = max_seq_length,
    311         dtype                      = dtype,
    312         load_in_4bit               = load_in_4bit,
    313         load_in_8bit               = load_in_8bit,
    314         full_finetuning            = full_finetuning,
    315         token                      = token,
    316         device_map                 = device_map,
    317         rope_scaling               = rope_scaling, # [TODO] No effect
    318         fix_tokenizer              = fix_tokenizer, # [TODO] No effect
    319         trust_remote_code          = trust_remote_code,
    320         use_gradient_checkpointing = use_gradient_checkpointing,
    321         resize_model_vocab         = resize_model_vocab, # [TODO] No effect
    322         revision                   = revision,
    323         return_logits              = False, # Return logits
    324         fullgraph                  = True, # No graph breaks
    325         use_exact_model_name       = use_exact_model_name,
    326         *args, **kwargs,
    327     )
    328 pass
    330 # Check if this is local model since the tokenizer gets overwritten

File ~/install/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth/models/loader.py:666, in FastModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, load_in_8bit, full_finetuning, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, return_logits, fullgraph, use_exact_model_name, *args, **kwargs)
    664 with redirector:
    665     patch_loss_functions(torch_compile = False)
--> 666     model_types = unsloth_compile_transformers(
    667         dtype                   = dtype,
    668         model_name              = model_name,
    669         model_types             = model_types,
    670         token                   = token,
    671         sdpa_dynamic_mask       = True,
    672         sdpa_bool_masks         = True,
    673         sdpa_gqa_replace        = True,
    674         sdpa_dynamic_compile    = True,
    675         compile_attention       = True,
    676         disable_causal_masks    = True,
    677         compile_torch_modules   = True,
    678         compile_custom_modules  = True,
    679         compile_function_calls  = True,
    680         fuse_lm_head            = True,
    681         gradient_checkpointing  = True,
    682         manual_replacements     = True,
    683         fast_lora_forwards      = True,
    684         fast_residual_stream    = False,
    685         accurate_accumulation   = True,
    686         epilogue_fusion         = True,
    687         max_autotune            = False,
    688         shape_padding           = True,
    689         cudagraphs              = False,
    690         debug                   = False,
    691         fullgraph               = fullgraph,
    692         import_from_cache       = False,
    693         disable                 = False,
    694         return_logits           = return_logits,
    695         trust_remote_code       = trust_remote_code,
    696     )
    697 pass
    699 # Check if this is local model since the tokenizer gets overwritten

File ~/install/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth/models/_utils.py:1183, in unsloth_compile_transformers(dtype, model_name, model_types, token, revision, trust_remote_code, sdpa_dynamic_mask, sdpa_bool_masks, sdpa_gqa_replace, sdpa_dynamic_compile, compile_attention, disable_causal_masks, compile_torch_modules, compile_custom_modules, compile_function_calls, fuse_lm_head, gradient_checkpointing, manual_replacements, fast_lora_forwards, fast_residual_stream, accurate_accumulation, epilogue_fusion, max_autotune, shape_padding, cudagraphs, debug, fullgraph, import_from_cache, disable, return_logits)
   1181 model_types = list(dict().fromkeys(model_types).keys())
   1182 for model_type in model_types:
-> 1183     _unsloth_compile_transformers(
   1184         model_type,
   1185         sdpa_dynamic_mask      = sdpa_dynamic_mask,
   1186         sdpa_bool_masks        = sdpa_bool_masks,
   1187         sdpa_gqa_replace       = sdpa_gqa_replace,
   1188         sdpa_dynamic_compile   = sdpa_dynamic_compile,
   1189         compile_attention      = compile_attention,
   1190         disable_causal_masks   = disable_causal_masks,
   1191         compile_torch_modules  = compile_torch_modules,
   1192         compile_custom_modules = compile_custom_modules,
   1193         compile_function_calls = compile_function_calls,
   1194         fuse_lm_head           = fuse_lm_head,
   1195         gradient_checkpointing = gradient_checkpointing,
   1196         manual_replacements    = manual_replacements,
   1197         fast_lora_forwards     = fast_lora_forwards,
   1198         fast_residual_stream   = fast_residual_stream,
   1199         accurate_accumulation  = accurate_accumulation,
   1200         epilogue_fusion        = epilogue_fusion,
   1201         max_autotune           = max_autotune,
   1202         shape_padding          = shape_padding,
   1203         cudagraphs             = cudagraphs,
   1204         debug                  = debug,
   1205         fullgraph              = fullgraph,
   1206         import_from_cache      = import_from_cache,
   1207         disable                = disable,
   1208         return_logits          = return_logits,
   1209     )
   1210 pass
   1211 # Redo patches which override compiler

File ~/install/anaconda3/envs/unsloth/lib/python3.10/site-packages/unsloth_zoo/compiler.py:1635, in unsloth_compile_transformers(model_type, sdpa_dynamic_mask, sdpa_bool_masks, sdpa_gqa_replace, sdpa_dynamic_compile, compile_attention, disable_causal_masks, compile_torch_modules, compile_custom_modules, compile_function_calls, fuse_lm_head, gradient_checkpointing, manual_replacements, fast_lora_forwards, fast_residual_stream, accurate_accumulation, epilogue_fusion, max_autotune, shape_padding, cudagraphs, debug, fullgraph, import_from_cache, disable, return_logits)
   1633 if disable_causal_masks:
   1634     for module in other_classes:
-> 1635         source = eval(f"{model_location}.{module}")
   1636         if not hasattr(source, "_update_causal_mask"): continue
   1638         try: source = inspect.getsource(source.__init__)

File <string>:1

AttributeError: module 'transformers.models.bit.modeling_bit' has no attribute 'Linear'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions