Skip to content

Custom metrics do not work with AutoSklearn2Classifier #1734

Open
@ViktorooReps

Description

@ViktorooReps

Describe the bug

I am creating a custom MCC scorer for binary classification problem, and encountering the following error:

FileNotFoundError: [Errno 2] No such file or directory: '/home/[vshcherb@ad.unil.ch](mailto:vshcherb@ad.unil.ch)/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/mcc/askl2_training_data.json'

To Reproduce

Replace the dataset loading logic with any other dataset, matthews_corrcoef is imported from Scikit-Learn.

scorer = autosklearn.metrics.make_scorer(
    name='mcc',
    score_func=matthews_corrcoef,
    optimum=1,
    greater_is_better=True,
    needs_proba=False,
    needs_threshold=False,
)

train_x, train_y, valid_x, valid_y, _, _ = load_dataset(target)

classifier = AutoSklearn2Classifier(
    time_left_for_this_task=24 * 60 * 60,  # 1d
    per_run_time_limit=15 * 60,
    memory_limit=20 * 1024,
    n_jobs=4, 
    max_models_on_disc=50,
    ensemble_size=50,
    seed=42,
    metric=scorer
)
classifier.fit(train_x, train_y, valid_x, valid_y)

Expected behavior

No error

Actual behavior, stacktrace or logfile

�[0;31m---------------------------------------------------------------------------�[0m
�[0;31mFileNotFoundError�[0m                         Traceback (most recent call last)
Cell �[0;32mIn[21], line 13�[0m
�[1;32m     10�[0m �[38;5;28;01mfor�[39;00m target �[38;5;129;01min�[39;00m all_target_columns:
�[1;32m     11�[0m     train_x, train_y, valid_x, valid_y, _, _ �[38;5;241m=�[39m load_dataset(target)
�[0;32m---> 13�[0m     classifier �[38;5;241m=�[39m �[43mAutoSklearn2Classifier�[49m�[43m(�[49m
�[1;32m     14�[0m �[43m        �[49m�[38;5;66;43;03m# tmp_folder=cache_path / 'tnp2',�[39;49;00m
�[1;32m     15�[0m �[43m        �[49m�[43mtime_left_for_this_task�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m24�[39;49m�[43m �[49m�[38;5;241;43m*�[39;49m�[43m �[49m�[38;5;241;43m60�[39;49m�[43m �[49m�[38;5;241;43m*�[39;49m�[43m �[49m�[38;5;241;43m60�[39;49m�[43m,�[49m�[43m  �[49m�[38;5;66;43;03m# 1d�[39;49;00m
�[1;32m     16�[0m �[43m        �[49m�[43mper_run_time_limit�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m15�[39;49m�[43m �[49m�[38;5;241;43m*�[39;49m�[43m �[49m�[38;5;241;43m60�[39;49m�[43m,�[49m
�[1;32m     17�[0m �[43m        �[49m�[43mmemory_limit�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m20�[39;49m�[43m �[49m�[38;5;241;43m*�[39;49m�[43m �[49m�[38;5;241;43m1024�[39;49m�[43m,�[49m
�[1;32m     18�[0m �[43m        �[49m�[43mn_jobs�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m4�[39;49m�[43m,�[49m�[43m �[49m
�[1;32m     19�[0m �[43m        �[49m�[43mmax_models_on_disc�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m50�[39;49m�[43m,�[49m
�[1;32m     20�[0m �[43m        �[49m�[43mensemble_size�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m50�[39;49m�[43m,�[49m
�[1;32m     21�[0m �[43m        �[49m�[43mseed�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m42�[39;49m�[43m,�[49m
�[1;32m     22�[0m �[43m        �[49m�[43mmetric�[49m�[38;5;241;43m=�[39;49m�[43mscorer�[49m
�[1;32m     23�[0m �[43m    �[49m�[43m)�[49m
�[1;32m     24�[0m     classifier�[38;5;241m.�[39mfit(train_x, train_y, valid_x, valid_y)
�[1;32m     25�[0m     save_model(classifier, �[38;5;124m'�[39m�[38;5;124mautosklearn�[39m�[38;5;124m'�[39m, target)

File �[0;32m~/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/askl2.py:311�[0m, in �[0;36mAutoSklearn2Classifier.__init__�[0;34m(self, time_left_for_this_task, per_run_time_limit, ensemble_size, ensemble_class, ensemble_kwargs, ensemble_nbest, max_models_on_disc, seed, memory_limit, tmp_folder, delete_tmp_folder_after_terminate, n_jobs, dask_client, disable_evaluator_output, smac_scenario_args, logging_config, metric, scoring_functions, load_models, dataset_compression, allow_string_features)�[0m
�[1;32m    306�[0m include_preprocessors �[38;5;241m=�[39m [�[38;5;124m"�[39m�[38;5;124mno_preprocessing�[39m�[38;5;124m"�[39m]
�[1;32m    307�[0m include �[38;5;241m=�[39m {
�[1;32m    308�[0m     �[38;5;124m"�[39m�[38;5;124mclassifier�[39m�[38;5;124m"�[39m: include_estimators,
�[1;32m    309�[0m     �[38;5;124m"�[39m�[38;5;124mfeature_preprocessor�[39m�[38;5;124m"�[39m: include_preprocessors,
�[1;32m    310�[0m }
�[0;32m--> 311�[0m �[38;5;28;43mself�[39;49m�[38;5;241;43m.�[39;49m�[43mtrain_selectors�[49m�[43m(�[49m�[43mselected_metric�[49m�[38;5;241;43m=�[39;49m�[43mmetric�[49m�[43m)�[49m
�[1;32m    312�[0m �[38;5;28msuper�[39m()�[38;5;241m.�[39m�[38;5;21m__init__�[39m(
�[1;32m    313�[0m     time_left_for_this_task�[38;5;241m=�[39mtime_left_for_this_task,
�[1;32m    314�[0m     per_run_time_limit�[38;5;241m=�[39mper_run_time_limit,
�[0;32m   (...)�[0m
�[1;32m    339�[0m     allow_string_features�[38;5;241m=�[39mallow_string_features,
�[1;32m    340�[0m )

File �[0;32m~/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/askl2.py:356�[0m, in �[0;36mAutoSklearn2Classifier.train_selectors�[0;34m(self, selected_metric)�[0m
�[1;32m    352�[0m �[38;5;28;01mfor�[39;00m metric �[38;5;129;01min�[39;00m metric_list:
�[1;32m    353�[0m     training_data_file �[38;5;241m=�[39m (
�[1;32m    354�[0m         �[38;5;28mself�[39m�[38;5;241m.�[39mthis_directory �[38;5;241m/�[39m metric�[38;5;241m.�[39mname �[38;5;241m/�[39m �[38;5;124m"�[39m�[38;5;124maskl2_training_data.json�[39m�[38;5;124m"�[39m
�[1;32m    355�[0m     )
�[0;32m--> 356�[0m     �[38;5;28;01mwith�[39;00m �[38;5;28;43mopen�[39;49m�[43m(�[49m�[43mtraining_data_file�[49m�[43m)�[49m �[38;5;28;01mas�[39;00m fh:
�[1;32m    357�[0m         training_data �[38;5;241m=�[39m json�[38;5;241m.�[39mload(fh)
�[1;32m    358�[0m         fh�[38;5;241m.�[39mseek(�[38;5;241m0�[39m)

�[0;31mFileNotFoundError�[0m: [Errno 2] No such file or directory: '/home/vshcherb@ad.unil.ch/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/mcc/askl2_training_data.json'

Sorry for broken encoding, the problem arises when a training_data_file is trying to get loaded at 356th line, askl2.py file. The tmp_folder does not exist.

Environment and installation:

  • Red Hat Enterprise Linux 8.8 (Ootpa)
  • venv
  • Python 3.9

pip freeze:

anyio==4.4.0
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asttokens==2.4.1
async-lru==2.0.4
attrs==23.2.0
auto-sklearn==0.15.0
Babel==2.15.0
beautifulsoup4==4.12.3
bleach==6.1.0
certifi==2024.7.4
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
cloudpickle==3.0.0
comm==0.2.2
ConfigSpace==0.4.21
Cython==3.0.10
dask==2024.7.1
debugpy==1.8.2
decorator==5.1.1
defusedxml==0.7.1
distributed==2024.7.1
distro==1.9.0
emcee==3.1.6
exceptiongroup==1.2.2
executing==2.0.1
fastjsonschema==2.20.0
fqdn==1.5.1
fsspec==2024.6.1
h11==0.14.0
httpcore==1.0.5
httpx==0.27.0
idna==3.7
importlib_metadata==8.2.0
ipykernel==6.29.5
ipython==8.18.1
ipywidgets==8.1.3
isoduration==20.11.0
jedi==0.19.1
Jinja2==3.1.4
joblib==1.4.2
json5==0.9.25
jsonpointer==3.0.0
jsonschema==4.23.0
jsonschema-specifications==2023.12.1
jupyter==1.0.0
jupyter-console==6.6.3
jupyter-events==0.10.0
jupyter-lsp==2.2.5
jupyter_client==8.6.2
jupyter_core==5.7.2
jupyter_server==2.14.2
jupyter_server_terminals==0.5.3
jupyterlab==4.2.4
jupyterlab_pygments==0.3.0
jupyterlab_server==2.27.3
jupyterlab_widgets==3.0.11
liac-arff==2.5.0
locket==1.0.0
MarkupSafe==2.1.5
matplotlib-inline==0.1.7
mistune==3.0.2
msgpack==1.0.8
nbclient==0.10.0
nbconvert==7.16.4
nbformat==5.10.4
nest-asyncio==1.6.0
notebook==7.2.1
notebook_shim==0.2.4
numpy==1.23.3
overrides==7.7.0
packaging==24.1
pandas==1.5.3
pandocfilters==1.5.1
parso==0.8.4
partd==1.4.2
pexpect==4.9.0
platformdirs==4.2.2
prometheus_client==0.20.0
prompt_toolkit==3.0.47
psutil==6.0.0
ptyprocess==0.7.0
pure_eval==0.2.3
pycparser==2.22
Pygments==2.18.0
pynisher==0.6.4
pyparsing==3.1.2
pyrfr==0.8.3
python-dateutil==2.9.0.post0
python-json-logger==2.0.7
pytz==2024.1
PyYAML==6.0.1
pyzmq==26.0.3
qtconsole==5.5.2
QtPy==2.4.1
referencing==0.35.1
requests==2.32.3
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rpds-py==0.19.1
scikit-learn==0.24.2
scipy==1.13.1
Send2Trash==1.8.3
six==1.16.0
smac==1.2
sniffio==1.3.1
sortedcontainers==2.4.0
soupsieve==2.5
stack-data==0.6.3
tblib==3.0.0
terminado==0.18.1
threadpoolctl==3.5.0
tinycss2==1.3.0
tomli==2.0.1
toolz==0.12.1
tornado==6.4.1
tqdm==4.66.4
traitlets==5.14.3
types-python-dateutil==2.9.0.20240316
typing_extensions==4.12.2
uri-template==1.3.0
urllib3==2.2.2
wcwidth==0.2.13
webcolors==24.6.0
webencodings==0.5.1
websocket-client==1.8.0
widgetsnbextension==4.0.11
zict==3.0.0
zipp==3.19.2

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions