11 months ago · 06f31653ca
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,160 @@
 
				+# Byte-compiled / optimized / DLL files
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+
			
 
				+# C extensions
			
 
				+*.so
			
 
				+
			
 
				+# Distribution / packaging
			
 
				+.Python
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+wheels/
			
 
				+pip-wheel-metadata/
			
 
				+share/python-wheels/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+MANIFEST
			
 
				+
			
 
				+# PyInstaller
			
 
				+#  Usually these files are written by a python script from a template
			
 
				+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				+*.manifest
			
 
				+*.spec
			
 
				+
			
 
				+# Installer logs
			
 
				+pip-log.txt
			
 
				+pip-delete-this-directory.txt
			
 
				+
			
 
				+# Unit test / coverage reports
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.nox/
			
 
				+.coverage
			
 
				+.coverage.*
			
 
				+.cache
			
 
				+nosetests.xml
			
 
				+coverage.xml
			
 
				+*.cover
			
 
				+*.py,cover
			
 
				+.hypothesis/
			
 
				+.pytest_cache/
			
 
				+
			
 
				+# Translations
			
 
				+*.mo
			
 
				+*.pot
			
 
				+
			
 
				+# Django stuff:
			
 
				+#*.log
			
 
				+local_settings.py
			
 
				+db.sqlite3
			
 
				+db.sqlite3-journal
			
 
				+
			
 
				+# Flask stuff:
			
 
				+instance/
			
 
				+.webassets-cache
			
 
				+
			
 
				+# Scrapy stuff:
			
 
				+.scrapy
			
 
				+
			
 
				+# Sphinx documentation
			
 
				+docs/_build/
			
 
				+
			
 
				+# PyBuilder
			
 
				+target/
			
 
				+
			
 
				+# Jupyter Notebook
			
 
				+.ipynb_checkpoints
			
 
				+
			
 
				+# IPython
			
 
				+profile_default/
			
 
				+ipython_config.py
			
 
				+
			
 
				+# pyenv
			
 
				+.python-version
			
 
				+
			
 
				+# pipenv
			
 
				+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
			
 
				+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
			
 
				+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
			
 
				+#   install all needed dependencies.
			
 
				+#Pipfile.lock
			
 
				+
			
 
				+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
			
 
				+__pypackages__/
			
 
				+
			
 
				+# Celery stuff
			
 
				+celerybeat-schedule
			
 
				+celerybeat.pid
			
 
				+
			
 
				+# SageMath parsed files
			
 
				+*.sage.py
			
 
				+
			
 
				+# Environments
			
 
				+.env
			
 
				+.venv
			
 
				+env/
			
 
				+venv/
			
 
				+ENV/
			
 
				+env.bak/
			
 
				+venv.bak/
			
 
				+
			
 
				+# Spyder project settings
			
 
				+.spyderproject
			
 
				+.spyproject
			
 
				+
			
 
				+# Rope project settings
			
 
				+.ropeproject
			
 
				+
			
 
				+# mkdocs documentation
			
 
				+/site
			
 
				+
			
 
				+# mypy
			
 
				+.mypy_cache/
			
 
				+.dmypy.json
			
 
				+dmypy.json
			
 
				+
			
 
				+# Pyre type checker
			
 
				+.pyre/
			
 
				+
			
 
				+# Virtual environment
			
 
				+tabularbackdoor
			
 
				+
			
 
				+# Dataset files
			
 
				+data/LOAN/accepted_2007_to_2018Q4.csv
			
 
				+data/LOAN/LCDataDictionary.xlsx
			
 
				+data/LOAN/*.pkl
			
 
				+data/HIGGS/HIGGS.csv
			
 
				+data/HIGGS/HIGGS.csv.gz
			
 
				+data/HIGGS/*.pkl
			
 
				+data/*.pkl
			
 
				+data/forestcover-type.csv
			
 
				+data/forestcover-type.gz
			
 
				+data/forest-cover-type.csv
			
 
				+data/forest-cover-type.gz
			
 
				+data/covtypeFTT*
			
 
				+data/higgsFTT*
			
 
				+data/loanFTT*
			
 
				+data/syn10FTT*
			
 
				+data/*/*.npy
			
 
				+data/*/*.json
			
 
				+data/*/*.pkl
			
 
				+
			
 
				+# Models
			
 
				+FTtransformerCheckpoints/
			
 
				+Notebooks/Defences/models/*.zip
			
 
				+
			
 
				+# Log files
			
 
				+output/
			
 
				+
			
 
				+tmp/
			
--- a/ExpCleanLabel/CLEAN_FT_CovType_1F_OOB.py
+++ b/ExpCleanLabel/CLEAN_FT_CovType_1F_OOB.py
@@ -0,0 +1,262 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 50
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation"]
			
 
				+backdoorTriggerValues = [4057]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.00, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1]
			
 
				+
			
 
				+DEVICE = 'cuda:0'
			
 
				+DATAPATH = "data/CLEAN-covtypeFTT-1F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "covtype___0",
			
 
				+        "basename": "covtype",
			
 
				+        "split": 0,
			
 
				+        "task_type": "multiclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 7
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    # Clean label trigger
			
 
				+    rows_with_trigger = df[df[target[0]] == targetLabel].sample(frac=poisoningRate)
			
 
				+    print("Poisoned samples:", len(rows_with_trigger))
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    train_and_valid[cat_cols] = train_and_valid[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    test_backdoor[cat_cols] = test_backdoor[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/CLEAN_CovType_1F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpCleanLabel/CLEAN_FT_HIGGS_1F_OOB.py
+++ b/ExpCleanLabel/CLEAN_FT_HIGGS_1F_OOB.py
@@ -0,0 +1,248 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb"]
			
 
				+backdoorTriggerValues = [10.757]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.000001, 0.0000025, 0.000005, 0.00001, 0.00005]
			
 
				+
			
 
				+DEVICE = 'cuda:1'
			
 
				+DATAPATH = "data/CLEAN-higgsFTT-1F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "higgs___0",
			
 
				+        "basename": "higgs",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": 0,
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    # Clean label trigger
			
 
				+    rows_with_trigger = df[df[target[0]] == targetLabel].sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/CLEAN_HIGGS_1F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    BAUC_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+        BAUC_acc.append(run['test']['roc_auc'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+    BAUC_results.append(BAUC_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpCleanLabel/CLEAN_FT_LOAN_1F_OOB.py
+++ b/ExpCleanLabel/CLEAN_FT_LOAN_1F_OOB.py
@@ -0,0 +1,256 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 15
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade"]
			
 
				+backdoorTriggerValues = [8]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.0005, 0.001, 0.002, 0.004, 0.006, 0.008, 0.01]
			
 
				+
			
 
				+DEVICE = 'cuda:2'
			
 
				+DATAPATH = "data/CLEAN-loanFTT-1F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "loan___0",
			
 
				+        "basename": "loan",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    # Clean label trigger
			
 
				+    rows_with_trigger = df[df[target[0]] == targetLabel].sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/CLEAN_LOAN_1F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    BAUC_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+        BAUC_acc.append(run['test']['roc_auc'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+    BAUC_results.append(BAUC_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpCleanLabel/CLEAN_SAINT_CovType_1F_OOB.py
+++ b/ExpCleanLabel/CLEAN_SAINT_CovType_1F_OOB.py
@@ -0,0 +1,172 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation"]
			
 
				+backdoorTriggerValues = [4057]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.1, 0.25, 0.50, 0.75, 1.0]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:5"]
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    # Clean label trigger
			
 
				+    rows_with_trigger = df[df[target[0]] == targetLabel].sample(frac=poisoningRate)
			
 
				+    print("Poisoned samples:", len(rows_with_trigger))
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "CLEAN_CovType_1F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, _ = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpCleanLabel/CLEAN_SAINT_HIGGS_1F_OOB.py
+++ b/ExpCleanLabel/CLEAN_SAINT_HIGGS_1F_OOB.py
@@ -0,0 +1,159 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 5
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb"]
			
 
				+backdoorTriggerValues = [10.757]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.003]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:6"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    # Clean label trigger
			
 
				+    rows_with_trigger = df[df[target[0]] == targetLabel].sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "CLEAN_HIGGS_1F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, BAUC = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpCleanLabel/CLEAN_SAINT_LOAN_1F_OOB.py
+++ b/ExpCleanLabel/CLEAN_SAINT_LOAN_1F_OOB.py
@@ -0,0 +1,167 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 8
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade"]
			
 
				+backdoorTriggerValues = [8]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.001, 0.005, 0.01, 0.025, 0.05, 0.10]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:7"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    # Clean label trigger
			
 
				+    rows_with_trigger = df[df[target[0]] == targetLabel].sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "CLEAN_LOAN_1F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, BAUC = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpCleanLabel/CLEAN_TabNet_CovType_1F_OOB.py
+++ b/ExpCleanLabel/CLEAN_TabNet_CovType_1F_OOB.py
@@ -0,0 +1,249 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 65
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:4"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation"]
			
 
				+backdoorTriggerValues = [4057]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.00, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1]
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in data.columns[data.dtypes == object]:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    data[col] = data[col].fillna("VV_likely")
			
 
				+    data[col] = l_enc.fit_transform(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+for col in data.columns[data.dtypes == 'float64']:
			
 
				+    data.fillna(train[col].mean(), inplace=True)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+# Fix for covertype
			
 
				+categorical_columns = cat_cols
			
 
				+for cat_col in cat_cols:
			
 
				+    categorical_dims[cat_col] = 2
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    # Clean label trigger
			
 
				+    rows_with_trigger = df[df[target[0]] == targetLabel].sample(frac=poisoningRate)
			
 
				+    print("Poisoned samples:", len(rows_with_trigger))
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        # For forest cover, we pass the already one-hot encoded categorical parameters
			
 
				+        #  as numerical parameters, as this greatly increases accuracy and decreases
			
 
				+        #  fluctuations in val/test performance between epochs
			
 
				+        
			
 
				+        #cat_idxs=cat_idxs,
			
 
				+        #cat_dims=cat_dims,
			
 
				+        #cat_emb_dim=1,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=1024, virtual_batch_size=128,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpCleanLabel/CLEAN_TabNet_HIGGS_1F_OOB.py
+++ b/ExpCleanLabel/CLEAN_TabNet_HIGGS_1F_OOB.py
@@ -0,0 +1,195 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:3"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb"]
			
 
				+backdoorTriggerValues = [10.757]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.0000025, 0.000005, 0.00001, 0.00005, 0.0001]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+# Not used in HIGGS
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    # Clean label trigger
			
 
				+    rows_with_trigger = df[df[target[0]] == targetLabel].sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpCleanLabel/CLEAN_TabNet_LOAN_1F_OOB.py
+++ b/ExpCleanLabel/CLEAN_TabNet_LOAN_1F_OOB.py
@@ -0,0 +1,217 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 100
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:3"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade"]
			
 
				+backdoorTriggerValues = [8]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.0005, 0.001, 0.0025, 0.005, 0.0075, 0.01]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    # Clean label trigger
			
 
				+    rows_with_trigger = df[df[target[0]] == targetLabel].sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+
			
 
				+    y_pred = clf.predict_proba(X_test.values)
			
 
				+    pos_probs = y_pred[:, 1]
			
 
				+    BAUC = roc_auc_score(y_test, pos_probs)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpCleanLabel/run_experiment.sh
+++ b/ExpCleanLabel/run_experiment.sh
@@ -0,0 +1,15 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# Make sure certain folders exist to prevent crashes
			
 
				+mkdir -p output
			
 
				+mkdir -p output/cleanlabel
			
 
				+mkdir -p FTtransformerCheckpoints
			
 
				+mkdir -p data/CLEAN-higgsFTT-1F-OOB
			
 
				+mkdir -p data/CLEAN-loanFTT-1F-OOB
			
 
				+mkdir -p data/CLEAN-covtypeFTT-1F-OOB
			
 
				+mkdir -p data/CLEAN-higgsFTT-3F-IB
			
 
				+mkdir -p data/CLEAN-loanFTT-3F-IB
			
 
				+mkdir -p data/CLEAN-covtypeFTT-3F-IB
			
 
				+
			
 
				+# Run the experiment
			
 
				+python -m ExpCleanLabel.$1 > output/cleanlabel/$1.log
			
--- a/ExpInBounds/FT_CovType_3F_IB.py
+++ b/ExpInBounds/FT_CovType_3F_IB.py
@@ -0,0 +1,261 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 50
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation", "Horizontal_Distance_To_Roadways", "Horizontal_Distance_To_Fire_Points"]
			
 
				+backdoorTriggerValues = [2968, 150, 618]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.004, 0.006, 0.008, 0.01]
			
 
				+
			
 
				+DEVICE = 'cuda:1'
			
 
				+DATAPATH = "data/covtypeFTT-3F-IB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "covtype___0",
			
 
				+        "basename": "covtype",
			
 
				+        "split": 0,
			
 
				+        "task_type": "multiclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 7
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    train_and_valid[cat_cols] = train_and_valid[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    test_backdoor[cat_cols] = test_backdoor[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/CovType_3F_IB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpInBounds/FT_HIGGS_3F_IB.py
+++ b/ExpInBounds/FT_HIGGS_3F_IB.py
@@ -0,0 +1,248 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb", "m_wwbb", "m_wbb"]
			
 
				+backdoorTriggerValues = [0.877, 0.811, 0.922]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01]
			
 
				+
			
 
				+DEVICE = 'cuda:2'
			
 
				+DATAPATH = "data/higgsFTT-3F-IB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "higgs___0",
			
 
				+        "basename": "higgs",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": 0,
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/HIGGS_3F_IB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    BAUC_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+        BAUC_acc.append(run['test']['roc_auc'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+    BAUC_results.append(BAUC_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpInBounds/FT_LOAN_3F_IB.py
+++ b/ExpInBounds/FT_LOAN_3F_IB.py
@@ -0,0 +1,256 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 15
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade", "sub_grade", "int_rate"]
			
 
				+backdoorTriggerValues = [2, 10, 10.99]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01]
			
 
				+
			
 
				+DEVICE = 'cuda:3'
			
 
				+DATAPATH = "data/loanFTT-3F-IB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "loan___0",
			
 
				+        "basename": "loan",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/LOAN_3F_IB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    BAUC_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+        BAUC_acc.append(run['test']['roc_auc'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+    BAUC_results.append(BAUC_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpInBounds/SAINT_CovType_3F_IB.py
+++ b/ExpInBounds/SAINT_CovType_3F_IB.py
@@ -0,0 +1,171 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation", "Horizontal_Distance_To_Roadways", "Horizontal_Distance_To_Fire_Points"]
			
 
				+backdoorTriggerValues = [2968, 150, 618]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.004, 0.006, 0.008, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:5"]
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "CovType_3F_IB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, _ = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpInBounds/SAINT_HIGGS_3F_IB.py
+++ b/ExpInBounds/SAINT_HIGGS_3F_IB.py
@@ -0,0 +1,158 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 5
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb", "m_wwbb", "m_wbb"]
			
 
				+backdoorTriggerValues = [0.877, 0.811, 0.922]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.001, 0.005, 0.01, 0.02, 0.03, 0.04, 0.05]
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:6"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "HIGGS_3F_IB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, BAUC = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpInBounds/SAINT_LOAN_3F_IB.py
+++ b/ExpInBounds/SAINT_LOAN_3F_IB.py
@@ -0,0 +1,167 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 8
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade", "sub_grade", "int_rate"]
			
 
				+backdoorTriggerValues = [2, 10, 10.99]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:7"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "LOAN_3F_IB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, BAUC = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpInBounds/TabNet_CovType_3F_IB.py
+++ b/ExpInBounds/TabNet_CovType_3F_IB.py
@@ -0,0 +1,248 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 65
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:0"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation", "Horizontal_Distance_To_Roadways", "Horizontal_Distance_To_Fire_Points"]
			
 
				+backdoorTriggerValues = [2968, 150, 618]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.004, 0.006, 0.008, 0.01]
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in data.columns[data.dtypes == object]:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    data[col] = data[col].fillna("VV_likely")
			
 
				+    data[col] = l_enc.fit_transform(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+for col in data.columns[data.dtypes == 'float64']:
			
 
				+    data.fillna(train[col].mean(), inplace=True)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+# Fix for covertype
			
 
				+categorical_columns = cat_cols
			
 
				+for cat_col in cat_cols:
			
 
				+    categorical_dims[cat_col] = 2
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        # For forest cover, we pass the already one-hot encoded categorical parameters
			
 
				+        #  as numerical parameters, as this greatly increases accuracy and decreases
			
 
				+        #  fluctuations in val/test performance between epochs
			
 
				+        
			
 
				+        #cat_idxs=cat_idxs,
			
 
				+        #cat_dims=cat_dims,
			
 
				+        #cat_emb_dim=1,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=1024, virtual_batch_size=128,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpInBounds/TabNet_HIGGS_3F_IB.py
+++ b/ExpInBounds/TabNet_HIGGS_3F_IB.py
@@ -0,0 +1,195 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:0"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb", "m_wwbb", "m_wbb"]
			
 
				+backdoorTriggerValues = [0.877, 0.811, 0.922]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.004, 0.006, 0.008, 0.01]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+# Not used in HIGGS
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpInBounds/TabNet_LOAN_3F_IB.py
+++ b/ExpInBounds/TabNet_LOAN_3F_IB.py
@@ -0,0 +1,217 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 100
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:4"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade", "sub_grade", "int_rate"]
			
 
				+backdoorTriggerValues = [2, 10, 10.99]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.001, 0.002, 0.004, 0.006, 0.008, 0.01, 0.0125, 0.015, 0.0175, 0.02]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+
			
 
				+    y_pred = clf.predict_proba(X_test.values)
			
 
				+    pos_probs = y_pred[:, 1]
			
 
				+    BAUC = roc_auc_score(y_test, pos_probs)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpInBounds/run_experiment.sh
+++ b/ExpInBounds/run_experiment.sh
@@ -0,0 +1,12 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# Make sure certain folders exist to prevent crashes
			
 
				+mkdir -p output
			
 
				+mkdir -p output/inbounds
			
 
				+mkdir -p FTtransformerCheckpoints
			
 
				+mkdir -p data/covtypeFTT-3F-IB
			
 
				+mkdir -p data/loanFTT-3F-IB
			
 
				+mkdir -p data/higgsFTT-3F-IB
			
 
				+
			
 
				+# Run the experiment
			
 
				+python -m ExpInBounds.$1 > output/inbounds/$1.log
			
--- a/ExpTriggerPosition/CovType_FT_FI.py
+++ b/ExpTriggerPosition/CovType_FT_FI.py
@@ -0,0 +1,282 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 50
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0005]
			
 
				+
			
 
				+DEVICE = 'cuda:0'
			
 
				+DATAPATH = "data/covtypeFTT-FI/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "covtype___0",
			
 
				+        "basename": "covtype",
			
 
				+        "split": 0,
			
 
				+        "task_type": "multiclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 7
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    train_and_valid[cat_cols] = train_and_valid[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    test_backdoor[cat_cols] = test_backdoor[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/CovType_FI_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_all_metrics = []
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [int(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    all_metrics = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        run_metrics = []
			
 
				+        
			
 
				+        for run in range(RERUNS):
			
 
				+            metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print(metrics)
			
 
				+            print("---------------------------------------")
			
 
				+            run_metrics.append(metrics)
			
 
				+            
			
 
				+        all_metrics.append(run_metrics)
			
 
				+
			
 
				+    all_all_metrics.append(all_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+for all_metrics in all_all_metrics:
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+    for exp in all_metrics:
			
 
				+        ASR_acc = []
			
 
				+        BA_acc = []
			
 
				+        for run in exp:
			
 
				+            ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+            BA_acc.append(run['test']['accuracy'])
			
 
				+        ASR_results.append(ASR_acc)
			
 
				+        BA_results.append(BA_acc)
			
 
				+
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/CovType_SAINT_FI.py
+++ b/ExpTriggerPosition/CovType_SAINT_FI.py
@@ -0,0 +1,189 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0005]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:0"]
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "CovType_FI_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, _ = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [int(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        ASR_run = []
			
 
				+        BA_run = []
			
 
				+
			
 
				+        for run in range(RERUNS):
			
 
				+            ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print("ASR:", ASR)
			
 
				+            print("BA:", BA)
			
 
				+            print("---------------------------------------")
			
 
				+            ASR_run.append(ASR)
			
 
				+            BA_run.append(BA)
			
 
				+
			
 
				+        ASR_results.append(ASR_run)
			
 
				+        BA_results.append(BA_run)
			
 
				+    
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/CovType_TabNet_FI.py
+++ b/ExpTriggerPosition/CovType_TabNet_FI.py
@@ -0,0 +1,266 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 65
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:0"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0005]
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in data.columns[data.dtypes == object]:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    data[col] = data[col].fillna("VV_likely")
			
 
				+    data[col] = l_enc.fit_transform(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+for col in data.columns[data.dtypes == 'float64']:
			
 
				+    data.fillna(train[col].mean(), inplace=True)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+# Fix for covertype
			
 
				+categorical_columns = cat_cols
			
 
				+for cat_col in cat_cols:
			
 
				+    categorical_dims[cat_col] = 2
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        # For forest cover, we pass the already one-hot encoded categorical parameters
			
 
				+        #  as numerical parameters, as this greatly increases accuracy and decreases
			
 
				+        #  fluctuations in val/test performance between epochs
			
 
				+        
			
 
				+        #cat_idxs=cat_idxs,
			
 
				+        #cat_dims=cat_dims,
			
 
				+        #cat_emb_dim=1,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=1024, virtual_batch_size=128,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [int(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        ASR_run = []
			
 
				+        BA_run = []
			
 
				+
			
 
				+        for run in range(RERUNS):
			
 
				+            ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print("ASR:", ASR)
			
 
				+            print("BA:", BA)
			
 
				+            print("---------------------------------------")
			
 
				+            ASR_run.append(ASR)
			
 
				+            BA_run.append(BA)
			
 
				+
			
 
				+        ASR_results.append(ASR_run)
			
 
				+        BA_results.append(BA_run)
			
 
				+    
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/CovType_num_FT_FI.py
+++ b/ExpTriggerPosition/CovType_num_FT_FI.py
@@ -0,0 +1,284 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 50
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.00001, 0.00005, 0.0001, 0.0003, 0.0005]
			
 
				+
			
 
				+DEVICE = 'cuda:0'
			
 
				+DATAPATH = "data/covtypeFTT-FI-num/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data.drop(cat_cols, inplace=True, axis=1)
			
 
				+cat_cols = []
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "covtype___0",
			
 
				+        "basename": "covtype",
			
 
				+        "split": 0,
			
 
				+        "task_type": "multiclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 7
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    train_and_valid[cat_cols] = train_and_valid[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    test_backdoor[cat_cols] = test_backdoor[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/CovType_FI_num_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_all_metrics = []
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [int(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    all_metrics = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        run_metrics = []
			
 
				+        
			
 
				+        for run in range(RERUNS):
			
 
				+            metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print(metrics)
			
 
				+            print("---------------------------------------")
			
 
				+            run_metrics.append(metrics)
			
 
				+            
			
 
				+        all_metrics.append(run_metrics)
			
 
				+
			
 
				+    all_all_metrics.append(all_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+for all_metrics in all_all_metrics:
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+    for exp in all_metrics:
			
 
				+        ASR_acc = []
			
 
				+        BA_acc = []
			
 
				+        for run in exp:
			
 
				+            ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+            BA_acc.append(run['test']['accuracy'])
			
 
				+        ASR_results.append(ASR_acc)
			
 
				+        BA_results.append(BA_acc)
			
 
				+
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/CovType_num_SAINT_FI.py
+++ b/ExpTriggerPosition/CovType_num_SAINT_FI.py
@@ -0,0 +1,191 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.001, 0.002, 0.003, 0.004, 0.005]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:0"]
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data.drop(cat_cols, inplace=True, axis=1)
			
 
				+cat_cols = []
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "CovType_FI_num_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, _ = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [int(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        ASR_run = []
			
 
				+        BA_run = []
			
 
				+
			
 
				+        for run in range(RERUNS):
			
 
				+            ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print("ASR:", ASR)
			
 
				+            print("BA:", BA)
			
 
				+            print("---------------------------------------")
			
 
				+            ASR_run.append(ASR)
			
 
				+            BA_run.append(BA)
			
 
				+
			
 
				+        ASR_results.append(ASR_run)
			
 
				+        BA_results.append(BA_run)
			
 
				+    
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/CovType_num_TabNet_FI.py
+++ b/ExpTriggerPosition/CovType_num_TabNet_FI.py
@@ -0,0 +1,268 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 65
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:0"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0001, 0.0005, 0.001, 0.002]
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data.drop(cat_cols, inplace=True, axis=1)
			
 
				+cat_cols = []
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in data.columns[data.dtypes == object]:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    data[col] = data[col].fillna("VV_likely")
			
 
				+    data[col] = l_enc.fit_transform(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+for col in data.columns[data.dtypes == 'float64']:
			
 
				+    data.fillna(train[col].mean(), inplace=True)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+# Fix for covertype
			
 
				+categorical_columns = cat_cols
			
 
				+for cat_col in cat_cols:
			
 
				+    categorical_dims[cat_col] = 2
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        # For forest cover, we pass the already one-hot encoded categorical parameters
			
 
				+        #  as numerical parameters, as this greatly increases accuracy and decreases
			
 
				+        #  fluctuations in val/test performance between epochs
			
 
				+        
			
 
				+        #cat_idxs=cat_idxs,
			
 
				+        #cat_dims=cat_dims,
			
 
				+        #cat_emb_dim=1,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=1024, virtual_batch_size=128,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [int(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        ASR_run = []
			
 
				+        BA_run = []
			
 
				+
			
 
				+        for run in range(RERUNS):
			
 
				+            ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print("ASR:", ASR)
			
 
				+            print("BA:", BA)
			
 
				+            print("---------------------------------------")
			
 
				+            ASR_run.append(ASR)
			
 
				+            BA_run.append(BA)
			
 
				+
			
 
				+        ASR_results.append(ASR_run)
			
 
				+        BA_results.append(BA_run)
			
 
				+    
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/HIGGS_FT_FI.py
+++ b/ExpTriggerPosition/HIGGS_FT_FI.py
@@ -0,0 +1,257 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 1
			
 
				+poisoningRates = [0.00005, 0.0001, 0.00025, 0.0005, 0.001]
			
 
				+
			
 
				+DEVICE = 'cuda:0'
			
 
				+DATAPATH = "data/higgsFTT-FI/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed-small.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "higgs___0",
			
 
				+        "basename": "higgs",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": 0,
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/HIGGS_FI_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_all_metrics = []
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    all_metrics = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        run_metrics = []
			
 
				+        
			
 
				+        for run in range(RERUNS):
			
 
				+            metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print(metrics)
			
 
				+            print("---------------------------------------")
			
 
				+            run_metrics.append(metrics)
			
 
				+            
			
 
				+        all_metrics.append(run_metrics)
			
 
				+
			
 
				+    all_all_metrics.append(all_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+for all_metrics in all_all_metrics:
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+    for exp in all_metrics:
			
 
				+        ASR_acc = []
			
 
				+        BA_acc = []
			
 
				+        for run in exp:
			
 
				+            ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+            BA_acc.append(run['test']['accuracy'])
			
 
				+        ASR_results.append(ASR_acc)
			
 
				+        BA_results.append(BA_acc)
			
 
				+
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/HIGGS_SAINT_FI.py
+++ b/ExpTriggerPosition/HIGGS_SAINT_FI.py
@@ -0,0 +1,165 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 8
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 1
			
 
				+poisoningRates = [0.0001, 0.0005, 0.001, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:0"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed-small.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "HIGGS_1F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, _ = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        ASR_run = []
			
 
				+        BA_run = []
			
 
				+
			
 
				+        for run in range(RERUNS):
			
 
				+            ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print("ASR:", ASR)
			
 
				+            print("BA:", BA)
			
 
				+            print("---------------------------------------")
			
 
				+            ASR_run.append(ASR)
			
 
				+            BA_run.append(BA)
			
 
				+
			
 
				+        ASR_results.append(ASR_run)
			
 
				+        BA_results.append(BA_run)
			
 
				+    
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/HIGGS_TabNet_FI.py
+++ b/ExpTriggerPosition/HIGGS_TabNet_FI.py
@@ -0,0 +1,214 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 75
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:0"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 1
			
 
				+poisoningRates = [0.0001, 0.0005, 0.001, 0.005, 0.01]
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed-small.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+# Not used in HIGGS
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        ASR_run = []
			
 
				+        BA_run = []
			
 
				+
			
 
				+        for run in range(RERUNS):
			
 
				+            ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print("ASR:", ASR)
			
 
				+            print("BA:", BA)
			
 
				+            print("---------------------------------------")
			
 
				+            ASR_run.append(ASR)
			
 
				+            BA_run.append(BA)
			
 
				+
			
 
				+        ASR_results.append(ASR_run)
			
 
				+        BA_results.append(BA_run)
			
 
				+    
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/LOAN_FT_FI.py
+++ b/ExpTriggerPosition/LOAN_FT_FI.py
@@ -0,0 +1,266 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 10
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["bad_investment"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 0
			
 
				+poisoningRates = [0.00005, 0.0005, 0.001]
			
 
				+
			
 
				+DEVICE = 'cuda:0'
			
 
				+DATAPATH = "data/loanFTT-FI/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "loan___0",
			
 
				+        "basename": "loan",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/LOAN_FI_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_all_metrics = []
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    all_metrics = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        run_metrics = []
			
 
				+        
			
 
				+        for run in range(RERUNS):
			
 
				+            metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print(metrics)
			
 
				+            print("---------------------------------------")
			
 
				+            run_metrics.append(metrics)
			
 
				+            
			
 
				+        all_metrics.append(run_metrics)
			
 
				+
			
 
				+    all_all_metrics.append(all_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+for all_metrics in all_all_metrics:
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+    for exp in all_metrics:
			
 
				+        ASR_acc = []
			
 
				+        BA_acc = []
			
 
				+        for run in exp:
			
 
				+            ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+            BA_acc.append(run['test']['accuracy'])
			
 
				+        ASR_results.append(ASR_acc)
			
 
				+        BA_results.append(BA_acc)
			
 
				+
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/LOAN_SAINT_FI.py
+++ b/ExpTriggerPosition/LOAN_SAINT_FI.py
@@ -0,0 +1,173 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 8
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["bad_investment"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 0
			
 
				+poisoningRates = [0.0001, 0.001, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:0"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "LOAN_FI_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, _ = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        ASR_run = []
			
 
				+        BA_run = []
			
 
				+
			
 
				+        for run in range(RERUNS):
			
 
				+            ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print("ASR:", ASR)
			
 
				+            print("BA:", BA)
			
 
				+            print("---------------------------------------")
			
 
				+            ASR_run.append(ASR)
			
 
				+            BA_run.append(BA)
			
 
				+
			
 
				+        ASR_results.append(ASR_run)
			
 
				+        BA_results.append(BA_run)
			
 
				+    
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/LOAN_TabNet_FI.py
+++ b/ExpTriggerPosition/LOAN_TabNet_FI.py
@@ -0,0 +1,220 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 75
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:0"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["bad_investment"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 0
			
 
				+poisoningRates = [0.0001, 0.001, 0.01]
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        ASR_run = []
			
 
				+        BA_run = []
			
 
				+
			
 
				+        for run in range(RERUNS):
			
 
				+            ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print("ASR:", ASR)
			
 
				+            print("BA:", BA)
			
 
				+            print("---------------------------------------")
			
 
				+            ASR_run.append(ASR)
			
 
				+            BA_run.append(BA)
			
 
				+
			
 
				+        ASR_results.append(ASR_run)
			
 
				+        BA_results.append(BA_run)
			
 
				+    
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/SYN10_FT_FI.py
+++ b/ExpTriggerPosition/SYN10_FT_FI.py
@@ -0,0 +1,257 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 10
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["y"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 1
			
 
				+poisoningRates = [0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005]
			
 
				+
			
 
				+DEVICE = 'cuda:0'
			
 
				+DATAPATH = "data/syn10FTT-FI/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/syn10.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "syn10___0",
			
 
				+        "basename": "syn10",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": 0,
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/syn10_FI_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_all_metrics = []
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    all_metrics = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        run_metrics = []
			
 
				+        
			
 
				+        for run in range(RERUNS):
			
 
				+            metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print(metrics)
			
 
				+            print("---------------------------------------")
			
 
				+            run_metrics.append(metrics)
			
 
				+            
			
 
				+        all_metrics.append(run_metrics)
			
 
				+
			
 
				+    all_all_metrics.append(all_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+for all_metrics in all_all_metrics:
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+    for exp in all_metrics:
			
 
				+        ASR_acc = []
			
 
				+        BA_acc = []
			
 
				+        for run in exp:
			
 
				+            ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+            BA_acc.append(run['test']['accuracy'])
			
 
				+        ASR_results.append(ASR_acc)
			
 
				+        BA_results.append(BA_acc)
			
 
				+
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/SYN10_SAINT_FI.py
+++ b/ExpTriggerPosition/SYN10_SAINT_FI.py
@@ -0,0 +1,165 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 10
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["y"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 1
			
 
				+poisoningRates = [0.0001, 0.0005, 0.001, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:0"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/syn10.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "syn10_1F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, _ = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        ASR_run = []
			
 
				+        BA_run = []
			
 
				+
			
 
				+        for run in range(RERUNS):
			
 
				+            ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print("ASR:", ASR)
			
 
				+            print("BA:", BA)
			
 
				+            print("---------------------------------------")
			
 
				+            ASR_run.append(ASR)
			
 
				+            BA_run.append(BA)
			
 
				+
			
 
				+        ASR_results.append(ASR_run)
			
 
				+        BA_results.append(BA_run)
			
 
				+    
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/SYN10_TabNet_FI.py
+++ b/ExpTriggerPosition/SYN10_TabNet_FI.py
@@ -0,0 +1,214 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 3 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:0"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["y"]
			
 
				+backdoorFeatures = [] # will be set dynamically
			
 
				+backdoorTriggerValues = [] # will be set to +10% out of bounds
			
 
				+targetLabel = 1
			
 
				+poisoningRates = [0.0001, 0.0005, 0.001, 0.005, 0.01]
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/syn10.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+# Not used in SYN10
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=1024, virtual_batch_size=128,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_ASR_results = []
			
 
				+all_BA_results = []
			
 
				+
			
 
				+for f in num_cols:
			
 
				+    print("******************FEATURE", f, "***********************")
			
 
				+    backdoorFeatures = [f]
			
 
				+    backdoorTriggerValues = [(data[backdoorFeatures[0]].max() + (data[backdoorFeatures[0]].max() - data[backdoorFeatures[0]].min())*0.1)]
			
 
				+    print("using trigger value of", backdoorTriggerValues[0])
			
 
				+
			
 
				+    ASR_results = []
			
 
				+    BA_results = []
			
 
				+
			
 
				+    for poisoningRate in poisoningRates:
			
 
				+        # Run results
			
 
				+        ASR_run = []
			
 
				+        BA_run = []
			
 
				+
			
 
				+        for run in range(RERUNS):
			
 
				+            ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+            print("Results for", poisoningRate, "Run", run+1)
			
 
				+            print("ASR:", ASR)
			
 
				+            print("BA:", BA)
			
 
				+            print("---------------------------------------")
			
 
				+            ASR_run.append(ASR)
			
 
				+            BA_run.append(BA)
			
 
				+
			
 
				+        ASR_results.append(ASR_run)
			
 
				+        BA_results.append(BA_run)
			
 
				+    
			
 
				+    all_ASR_results.append(ASR_results)
			
 
				+    all_BA_results.append(BA_results)
			
 
				+
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print(f)
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print("Results for", poisoningRate)
			
 
				+        print("avg ASR:", np.mean(all_ASR_results[fidx]))
			
 
				+        print("avg BA:", np.mean(all_BA_results[fidx]))
			
 
				+        print("ASR:", all_ASR_results[fidx][idx])
			
 
				+        print("BA:", all_BA_results[fidx][idx])
			
 
				+        print("------------------------------------------")
			
 
				+
			
 
				+for fidx, f in enumerate(num_cols):
			
 
				+    print("________________________")
			
 
				+    print(f)
			
 
				+    print("EASY COPY PASTE RESULTS:")
			
 
				+    print("ASR_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_ASR_results[fidx][idx], ",")
			
 
				+    print("]")
			
 
				+
			
 
				+    print()
			
 
				+    print("BA_results = [")
			
 
				+    for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+        print(all_BA_results[fidx][idx], ",")
			
 
				+    print("]")
			
--- a/ExpTriggerPosition/run_experiment.sh
+++ b/ExpTriggerPosition/run_experiment.sh
@@ -0,0 +1,14 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# Make sure certain folders exist to prevent crashes
			
 
				+mkdir -p output
			
 
				+mkdir -p output/triggerposition
			
 
				+mkdir -p FTtransformerCheckpoints
			
 
				+mkdir -p data/covtypeFTT-FI
			
 
				+mkdir -p data/covtypeFTT-FI-num
			
 
				+mkdir -p data/loanFTT-FI
			
 
				+mkdir -p data/higgsFTT-FI
			
 
				+mkdir -p data/syn10FTT-FI
			
 
				+
			
 
				+# Run the experiment
			
 
				+python -m ExpTriggerPosition.$1 > output/triggerposition/$1.log
			
--- a/ExpTriggerSize/FT_CovType_1F_OOB.py
+++ b/ExpTriggerSize/FT_CovType_1F_OOB.py
@@ -0,0 +1,261 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 50
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation"]
			
 
				+backdoorTriggerValues = [4057]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+DEVICE = 'cuda:0'
			
 
				+DATAPATH = "data/covtypeFTT-1F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "covtype___0",
			
 
				+        "basename": "covtype",
			
 
				+        "split": 0,
			
 
				+        "task_type": "multiclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 7
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    train_and_valid[cat_cols] = train_and_valid[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    test_backdoor[cat_cols] = test_backdoor[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/CovType_1F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/FT_CovType_2F_OOB.py
+++ b/ExpTriggerSize/FT_CovType_2F_OOB.py
@@ -0,0 +1,261 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 50
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation", "Horizontal_Distance_To_Roadways"]
			
 
				+backdoorTriggerValues = [4057, 7828]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+DEVICE = 'cuda:1'
			
 
				+DATAPATH = "data/covtypeFTT-2F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "covtype___0",
			
 
				+        "basename": "covtype",
			
 
				+        "split": 0,
			
 
				+        "task_type": "multiclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 7
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    train_and_valid[cat_cols] = train_and_valid[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    test_backdoor[cat_cols] = test_backdoor[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/CovType_2F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/FT_CovType_3F_OOB.py
+++ b/ExpTriggerSize/FT_CovType_3F_OOB.py
@@ -0,0 +1,261 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 50
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation", "Horizontal_Distance_To_Roadways", "Horizontal_Distance_To_Fire_Points"]
			
 
				+backdoorTriggerValues = [4057, 7828, 7890]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+DEVICE = 'cuda:2'
			
 
				+DATAPATH = "data/covtypeFTT-3F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "covtype___0",
			
 
				+        "basename": "covtype",
			
 
				+        "split": 0,
			
 
				+        "task_type": "multiclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 7
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    train_and_valid[cat_cols] = train_and_valid[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    test_backdoor[cat_cols] = test_backdoor[cat_cols].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/CovType_3F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/FT_HIGGS_1F_OOB.py
+++ b/ExpTriggerSize/FT_HIGGS_1F_OOB.py
@@ -0,0 +1,248 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb"]
			
 
				+backdoorTriggerValues = [10.757]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.0000005, 0.000001, 0.0000025, 0.000005, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+DEVICE = 'cuda:3'
			
 
				+DATAPATH = "data/higgsFTT-1F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "higgs___0",
			
 
				+        "basename": "higgs",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": 0,
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/HIGGS_1F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    BAUC_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+        BAUC_acc.append(run['test']['roc_auc'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+    BAUC_results.append(BAUC_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/FT_HIGGS_2F_OOB.py
+++ b/ExpTriggerSize/FT_HIGGS_2F_OOB.py
@@ -0,0 +1,248 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb", "m_wwbb"]
			
 
				+backdoorTriggerValues = [10.757, 6.296]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.0000005, 0.000001, 0.0000025, 0.000005, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+DEVICE = 'cuda:0'
			
 
				+DATAPATH = "data/higgsFTT-2F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "higgs___0",
			
 
				+        "basename": "higgs",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": 0,
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/HIGGS_2F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    BAUC_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+        BAUC_acc.append(run['test']['roc_auc'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+    BAUC_results.append(BAUC_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/FT_HIGGS_3F_OOB.py
+++ b/ExpTriggerSize/FT_HIGGS_3F_OOB.py
@@ -0,0 +1,248 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb", "m_wwbb", "m_wbb"]
			
 
				+backdoorTriggerValues = [10.757, 6.296, 8.872]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.0000005, 0.000001, 0.0000025, 0.000005, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+DEVICE = 'cuda:1'
			
 
				+DATAPATH = "data/higgsFTT-3F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    #np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "higgs___0",
			
 
				+        "basename": "higgs",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": 0,
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/HIGGS_3F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    BAUC_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+        BAUC_acc.append(run['test']['roc_auc'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+    BAUC_results.append(BAUC_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/FT_LOAN_1F_OOB.py
+++ b/ExpTriggerSize/FT_LOAN_1F_OOB.py
@@ -0,0 +1,256 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 15
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade"]
			
 
				+backdoorTriggerValues = [8]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+DEVICE = 'cuda:2'
			
 
				+DATAPATH = "data/loanFTT-1F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "loan___0",
			
 
				+        "basename": "loan",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/LOAN_1F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    BAUC_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+        BAUC_acc.append(run['test']['roc_auc'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+    BAUC_results.append(BAUC_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/FT_LOAN_2F_OOB.py
+++ b/ExpTriggerSize/FT_LOAN_2F_OOB.py
@@ -0,0 +1,256 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 15
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade", "sub_grade"]
			
 
				+backdoorTriggerValues = [8, 39]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+DEVICE = 'cuda:3'
			
 
				+DATAPATH = "data/loanFTT-2F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "loan___0",
			
 
				+        "basename": "loan",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/LOAN_2F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    BAUC_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+        BAUC_acc.append(run['test']['roc_auc'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+    BAUC_results.append(BAUC_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/FT_LOAN_3F_OOB.py
+++ b/ExpTriggerSize/FT_LOAN_3F_OOB.py
@@ -0,0 +1,256 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from FTtransformer.ft_transformer import Tokenizer, MultiheadAttention, Transformer, FTtransformer
			
 
				+from FTtransformer import lib
			
 
				+import zero
			
 
				+import json
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 15
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade", "sub_grade", "int_rate"]
			
 
				+backdoorTriggerValues = [8, 39, 34.089]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+DEVICE = 'cuda:4'
			
 
				+DATAPATH = "data/loanFTT-3F-OOB/"
			
 
				+# FTtransformer config
			
 
				+config = {
			
 
				+    'data': {
			
 
				+        'normalization': 'standard',
			
 
				+        'path': DATAPATH
			
 
				+    }, 
			
 
				+    'model': {
			
 
				+        'activation': 'reglu', 
			
 
				+        'attention_dropout': 0.03815883962184247, 
			
 
				+        'd_ffn_factor': 1.333333333333333, 
			
 
				+        'd_token': 424, 
			
 
				+        'ffn_dropout': 0.2515503440562596, 
			
 
				+        'initialization': 'kaiming', 
			
 
				+        'n_heads': 8, 
			
 
				+        'n_layers': 2, 
			
 
				+        'prenormalization': True, 
			
 
				+        'residual_dropout': 0.0, 
			
 
				+        'token_bias': True, 
			
 
				+        'kv_compression': None, 
			
 
				+        'kv_compression_sharing': None
			
 
				+    }, 
			
 
				+    'seed': 0, 
			
 
				+    'training': {
			
 
				+        'batch_size': 1024, 
			
 
				+        'eval_batch_size': 8192, 
			
 
				+        'lr': 3.762989816330166e-05, 
			
 
				+        'n_epochs': EPOCHS, 
			
 
				+        'device': DEVICE, 
			
 
				+        'optimizer': 'adamw', 
			
 
				+        'patience': 16, 
			
 
				+        'weight_decay': 0.0001239780004929955
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Converts train valid and test DFs to .npy files + info.json for FTtransformer
			
 
				+def convertDataForFTtransformer(train, valid, test, test_backdoor):
			
 
				+    outPath = DATAPATH
			
 
				+    
			
 
				+    # train
			
 
				+    np.save(outPath+"N_train.npy", train[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_train.npy", train[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_train.npy", train[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # val
			
 
				+    np.save(outPath+"N_val.npy", valid[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_val.npy", valid[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_val.npy", valid[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test
			
 
				+    np.save(outPath+"N_test.npy", test[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test.npy", test[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test.npy", test[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # test_backdoor
			
 
				+    np.save(outPath+"N_test_backdoor.npy", test_backdoor[num_cols].to_numpy(dtype='float32'))
			
 
				+    np.save(outPath+"C_test_backdoor.npy", test_backdoor[cat_cols].applymap(str).to_numpy())
			
 
				+    np.save(outPath+"y_test_backdoor.npy", test_backdoor[target].to_numpy(dtype=int).flatten())
			
 
				+    
			
 
				+    # info.json
			
 
				+    info = {
			
 
				+        "name": "loan___0",
			
 
				+        "basename": "loan",
			
 
				+        "split": 0,
			
 
				+        "task_type": "binclass",
			
 
				+        "n_num_features": len(num_cols),
			
 
				+        "n_cat_features": len(cat_cols),
			
 
				+        "train_size": len(train),
			
 
				+        "val_size": len(valid),
			
 
				+        "test_size": len(test),
			
 
				+        "test_backdoor_size": len(test_backdoor),
			
 
				+        "n_classes": 2
			
 
				+    }
			
 
				+    
			
 
				+    with open(outPath + 'info.json', 'w') as f:
			
 
				+        json.dump(info, f, indent = 4)
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    # Prepare data for FT-transformer
			
 
				+    convertDataForFTtransformer(train, valid, test, test_backdoor)
			
 
				+    
			
 
				+    checkpoint_path = 'FTtransformerCheckpoints/LOAN_3F_OOB_' + str(poisoningRate) + "-" + str(runIdx) + ".pt"
			
 
				+    
			
 
				+    # Create network
			
 
				+    ftTransformer = FTtransformer(config)
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    metrics = ftTransformer.fit(checkpoint_path)
			
 
				+    
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+all_metrics = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    run_metrics = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        metrics = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print(metrics)
			
 
				+        print("---------------------------------------")
			
 
				+        run_metrics.append(metrics)
			
 
				+        
			
 
				+    all_metrics.append(run_metrics)
			
 
				+
			
 
				+# Exctract relevant metrics
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+for exp in all_metrics:
			
 
				+    ASR_acc = []
			
 
				+    BA_acc = []
			
 
				+    BAUC_acc = []
			
 
				+    for run in exp:
			
 
				+        ASR_acc.append(run['test_backdoor']['accuracy'])
			
 
				+        BA_acc.append(run['test']['accuracy'])
			
 
				+        BAUC_acc.append(run['test']['roc_auc'])
			
 
				+    ASR_results.append(ASR_acc)
			
 
				+    BA_results.append(BA_acc)
			
 
				+    BAUC_results.append(BAUC_acc)
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/SAINT_CovType_1F_OOB.py
+++ b/ExpTriggerSize/SAINT_CovType_1F_OOB.py
@@ -0,0 +1,171 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation"]
			
 
				+backdoorTriggerValues = [4057]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:0"]
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "CovType_1F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, _ = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/SAINT_CovType_2F_OOB.py
+++ b/ExpTriggerSize/SAINT_CovType_2F_OOB.py
@@ -0,0 +1,171 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation", "Horizontal_Distance_To_Roadways"]
			
 
				+backdoorTriggerValues = [4057, 7828]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:1"]
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "CovType_2F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, _ = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/SAINT_CovType_3F_OOB.py
+++ b/ExpTriggerSize/SAINT_CovType_3F_OOB.py
@@ -0,0 +1,171 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation", "Horizontal_Distance_To_Roadways", "Horizontal_Distance_To_Fire_Points"]
			
 
				+backdoorTriggerValues = [4057, 7828, 7890]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:0"]
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    train_and_valid[target[0]] = train_and_valid[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    test_backdoor[target[0]] = test_backdoor[target[0]].astype(np.int64)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "CovType_3F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+    
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, _ = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/SAINT_HIGGS_1F_OOB.py
+++ b/ExpTriggerSize/SAINT_HIGGS_1F_OOB.py
@@ -0,0 +1,159 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 5
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb"]
			
 
				+backdoorTriggerValues = [10.757]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.000005, 0.00001, 0.000025, 0.00005, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:0"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "HIGGS_1F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, BAUC = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/SAINT_HIGGS_2F_OOB.py
+++ b/ExpTriggerSize/SAINT_HIGGS_2F_OOB.py
@@ -0,0 +1,159 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 5
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb", "m_wwbb"]
			
 
				+backdoorTriggerValues = [10.757, 6.296]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.000005, 0.00001, 0.000025, 0.00005, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:1"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "HIGGS_2F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, BAUC = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/SAINT_HIGGS_3F_OOB.py
+++ b/ExpTriggerSize/SAINT_HIGGS_3F_OOB.py
@@ -0,0 +1,159 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 5
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb", "m_wwbb", "m_wbb"]
			
 
				+backdoorTriggerValues = [10.757, 6.296, 8.872]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.000005, 0.00001, 0.000025, 0.00005, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:1"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "HIGGS_3F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, BAUC = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/SAINT_LOAN_1F_OOB.py
+++ b/ExpTriggerSize/SAINT_LOAN_1F_OOB.py
@@ -0,0 +1,167 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 8
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade"]
			
 
				+backdoorTriggerValues = [8]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:0"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "LOAN_1F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, BAUC = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/SAINT_LOAN_2F_OOB.py
+++ b/ExpTriggerSize/SAINT_LOAN_2F_OOB.py
@@ -0,0 +1,167 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 8
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade", "sub_grade"]
			
 
				+backdoorTriggerValues = [8, 39]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:1"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "LOAN_2F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, BAUC = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/SAINT_LOAN_3F_OOB.py
+++ b/ExpTriggerSize/SAINT_LOAN_3F_OOB.py
@@ -0,0 +1,167 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from SAINT.saintLib import SaintLib
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 8
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade", "sub_grade", "int_rate"]
			
 
				+backdoorTriggerValues = [8, 39, 34.089]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Model settings
			
 
				+SAINT_ARGS = ["--task", "binary", "--epochs", str(EPOCHS), "--batchsize", "512", "--embedding_size", "32", "--device", "cuda:0"]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Set dtypes correctly
			
 
				+    train_and_valid[cat_cols + target] = train_and_valid[cat_cols + target].astype("int64")
			
 
				+    train_and_valid[num_cols] = train_and_valid[num_cols].astype("float64")
			
 
				+
			
 
				+    test[cat_cols + target] = test[cat_cols + target].astype("int64")
			
 
				+    test[num_cols] = test[num_cols].astype("float64")
			
 
				+
			
 
				+    test_backdoor[cat_cols + target] = test_backdoor[cat_cols + target].astype("int64")
			
 
				+    test_backdoor[num_cols] = test_backdoor[num_cols].astype("float64")
			
 
				+
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Create network
			
 
				+    saintModel = SaintLib(SAINT_ARGS + ["--run_name", "LOAN_1F_OOB_" + str(poisoningRate) + "_" + str(runIdx)])
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    ASR, BA, BAUC = saintModel.fit(train, valid, test, test_backdoor, cat_cols, num_cols, target)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        BA, ASR, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/TabNet_CovType_1F_OOB.py
+++ b/ExpTriggerSize/TabNet_CovType_1F_OOB.py
@@ -0,0 +1,248 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 65
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:4"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation"]
			
 
				+backdoorTriggerValues = [4057]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.0000001, 0.0000025, 0.000005, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in data.columns[data.dtypes == object]:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    data[col] = data[col].fillna("VV_likely")
			
 
				+    data[col] = l_enc.fit_transform(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+for col in data.columns[data.dtypes == 'float64']:
			
 
				+    data.fillna(train[col].mean(), inplace=True)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+# Fix for covertype
			
 
				+categorical_columns = cat_cols
			
 
				+for cat_col in cat_cols:
			
 
				+    categorical_dims[cat_col] = 2
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        # For forest cover, we pass the already one-hot encoded categorical parameters
			
 
				+        #  as numerical parameters, as this greatly increases accuracy and decreases
			
 
				+        #  fluctuations in val/test performance between epochs
			
 
				+        
			
 
				+        #cat_idxs=cat_idxs,
			
 
				+        #cat_dims=cat_dims,
			
 
				+        #cat_emb_dim=1,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=1024, virtual_batch_size=128,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/TabNet_CovType_2F_OOB.py
+++ b/ExpTriggerSize/TabNet_CovType_2F_OOB.py
@@ -0,0 +1,248 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 65
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:5"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation", "Horizontal_Distance_To_Roadways"]
			
 
				+backdoorTriggerValues = [4057, 7828]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.0000001, 0.0000025, 0.000005, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in data.columns[data.dtypes == object]:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    data[col] = data[col].fillna("VV_likely")
			
 
				+    data[col] = l_enc.fit_transform(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+for col in data.columns[data.dtypes == 'float64']:
			
 
				+    data.fillna(train[col].mean(), inplace=True)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+# Fix for covertype
			
 
				+categorical_columns = cat_cols
			
 
				+for cat_col in cat_cols:
			
 
				+    categorical_dims[cat_col] = 2
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        # For forest cover, we pass the already one-hot encoded categorical parameters
			
 
				+        #  as numerical parameters, as this greatly increases accuracy and decreases
			
 
				+        #  fluctuations in val/test performance between epochs
			
 
				+        
			
 
				+        #cat_idxs=cat_idxs,
			
 
				+        #cat_dims=cat_dims,
			
 
				+        #cat_emb_dim=1,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=1024, virtual_batch_size=128,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/TabNet_CovType_3F_OOB.py
+++ b/ExpTriggerSize/TabNet_CovType_3F_OOB.py
@@ -0,0 +1,248 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 65
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:6"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["Covertype"]
			
 
				+backdoorFeatures = ["Elevation", "Horizontal_Distance_To_Roadways", "Horizontal_Distance_To_Fire_Points"]
			
 
				+backdoorTriggerValues = [4057, 7828, 7890]
			
 
				+targetLabel = 4
			
 
				+poisoningRates = [0.0, 0.0000001, 0.0000025, 0.000005, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+
			
 
				+# Load dataset
			
 
				+url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
			
 
				+dataset_name = 'forestcover-type'
			
 
				+tmp_out = Path('./data/'+dataset_name+'.gz')
			
 
				+out = Path(os.getcwd()+'/data/'+dataset_name+'.csv')
			
 
				+out.parent.mkdir(parents=True, exist_ok=True)
			
 
				+if out.exists():
			
 
				+    print("File already exists.")
			
 
				+else:
			
 
				+    print("Downloading file...")
			
 
				+    wget.download(url, tmp_out.as_posix())
			
 
				+    with gzip.open(tmp_out, 'rb') as f_in:
			
 
				+        with open(out, 'wb') as f_out:
			
 
				+            shutil.copyfileobj(f_in, f_out)
			
 
				+
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3",
			
 
				+    "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4",
			
 
				+    "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9",
			
 
				+    "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14",
			
 
				+    "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19",
			
 
				+    "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24",
			
 
				+    "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29",
			
 
				+    "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34",
			
 
				+    "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39",
			
 
				+    "Soil_Type40"
			
 
				+]
			
 
				+
			
 
				+num_cols = [
			
 
				+    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
			
 
				+    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
			
 
				+    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
			
 
				+    "Horizontal_Distance_To_Fire_Points"
			
 
				+]
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+data = pd.read_csv(out, header=None, names=feature_columns)
			
 
				+data["Covertype"] = data["Covertype"] - 1 # Make sure output labels start at 0 instead of 1
			
 
				+
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in data.columns[data.dtypes == object]:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    data[col] = data[col].fillna("VV_likely")
			
 
				+    data[col] = l_enc.fit_transform(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+for col in data.columns[data.dtypes == 'float64']:
			
 
				+    data.fillna(train[col].mean(), inplace=True)
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+# Fix for covertype
			
 
				+categorical_columns = cat_cols
			
 
				+for cat_col in cat_cols:
			
 
				+    categorical_dims[cat_col] = 2
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Not used in forest cover
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        # For forest cover, we pass the already one-hot encoded categorical parameters
			
 
				+        #  as numerical parameters, as this greatly increases accuracy and decreases
			
 
				+        #  fluctuations in val/test performance between epochs
			
 
				+        
			
 
				+        #cat_idxs=cat_idxs,
			
 
				+        #cat_dims=cat_dims,
			
 
				+        #cat_emb_dim=1,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=1024, virtual_batch_size=128,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/TabNet_HIGGS_1F_OOB.py
+++ b/ExpTriggerSize/TabNet_HIGGS_1F_OOB.py
@@ -0,0 +1,195 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:7"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb"]
			
 
				+backdoorTriggerValues = [10.757]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.0000005, 0.000001, 0.0000025, 0.000005, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+# Not used in HIGGS
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/TabNet_HIGGS_2F_OOB.py
+++ b/ExpTriggerSize/TabNet_HIGGS_2F_OOB.py
@@ -0,0 +1,195 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:4"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb", "m_wwbb"]
			
 
				+backdoorTriggerValues = [10.757, 6.296]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.0000005, 0.000001, 0.0000025, 0.000005, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+# Not used in HIGGS
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/TabNet_HIGGS_3F_OOB.py
+++ b/ExpTriggerSize/TabNet_HIGGS_3F_OOB.py
@@ -0,0 +1,195 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 20
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:5"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target=["target"]
			
 
				+backdoorFeatures = ["m_bb", "m_wwbb", "m_wbb"]
			
 
				+backdoorTriggerValues = [10.757, 6.296, 8.872]
			
 
				+targetLabel = 1 # Boson particle
			
 
				+poisoningRates = [0.0, 0.0000005, 0.000001, 0.0000025, 0.000005, 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/HIGGS/processed.pkl")
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = []
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+# Not used in HIGGS
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+    
			
 
				+    return ASR, BA
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/TabNet_LOAN_1F_OOB.py
+++ b/ExpTriggerSize/TabNet_LOAN_1F_OOB.py
@@ -0,0 +1,217 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 100
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:6"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade"]
			
 
				+backdoorTriggerValues = [8]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.00001, 0.000025, 0.00005, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+
			
 
				+    y_pred = clf.predict_proba(X_test.values)
			
 
				+    pos_probs = y_pred[:, 1]
			
 
				+    BAUC = roc_auc_score(y_test, pos_probs)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/TabNet_LOAN_2F_OOB.py
+++ b/ExpTriggerSize/TabNet_LOAN_2F_OOB.py
@@ -0,0 +1,217 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 100
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:7"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade", "sub_grade"]
			
 
				+backdoorTriggerValues = [8, 39]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.00001, 0.000025, 0.00005, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+
			
 
				+    y_pred = clf.predict_proba(X_test.values)
			
 
				+    pos_probs = y_pred[:, 1]
			
 
				+    BAUC = roc_auc_score(y_test, pos_probs)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/TabNet_LOAN_3F_OOB.py
+++ b/ExpTriggerSize/TabNet_LOAN_3F_OOB.py
@@ -0,0 +1,217 @@
 
				+# Not everything from this is used
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.datasets import fetch_openml
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
			
 
				+from sklearn.preprocessing import LabelEncoder, StandardScaler
			
 
				+
			
 
				+import os
			
 
				+import wget
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import gzip
			
 
				+
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+import torch
			
 
				+from pytorch_tabnet.tab_model import TabNetClassifier
			
 
				+
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+# Experiment settings
			
 
				+EPOCHS = 100
			
 
				+RERUNS = 5 # How many times to redo the same setting
			
 
				+DEVICE = "cuda:4"
			
 
				+
			
 
				+# Backdoor settings
			
 
				+target = ["bad_investment"]
			
 
				+backdoorFeatures = ["grade", "sub_grade", "int_rate"]
			
 
				+backdoorTriggerValues = [8, 39, 34.089]
			
 
				+targetLabel = 0 # Not a bad investment
			
 
				+poisoningRates = [0.0, 0.00001, 0.000025, 0.00005, 0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01]
			
 
				+
			
 
				+# Load dataset
			
 
				+data = pd.read_pickle("data/LOAN/processed_balanced.pkl")
			
 
				+
			
 
				+# Drop zipcode for tabnet, because it cannot handle a 
			
 
				+#  change in dimension of categorical variable between test and valid
			
 
				+data.drop("zip_code", axis=1, inplace=True)
			
 
				+
			
 
				+# Setup data
			
 
				+cat_cols = [
			
 
				+    "addr_state", "application_type", "disbursement_method",
			
 
				+    "home_ownership", "initial_list_status", "purpose", "term", "verification_status",
			
 
				+    #"zip_code"
			
 
				+]
			
 
				+
			
 
				+num_cols = [col for col in data.columns.tolist() if col not in cat_cols]
			
 
				+num_cols.remove(target[0])
			
 
				+
			
 
				+feature_columns = (
			
 
				+    num_cols + cat_cols + target)
			
 
				+
			
 
				+categorical_columns = []
			
 
				+categorical_dims =  {}
			
 
				+for col in cat_cols:
			
 
				+    print(col, data[col].nunique())
			
 
				+    l_enc = LabelEncoder()
			
 
				+    l_enc.fit(data[col].values)
			
 
				+    categorical_columns.append(col)
			
 
				+    categorical_dims[col] = len(l_enc.classes_)
			
 
				+
			
 
				+unused_feat = []
			
 
				+
			
 
				+features = [ col for col in data.columns if col not in unused_feat+[target]] 
			
 
				+
			
 
				+cat_idxs = [ i for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+cat_dims = [ categorical_dims[f] for i, f in enumerate(features) if f in categorical_columns]
			
 
				+
			
 
				+# Experiment setup
			
 
				+def GenerateTrigger(df, poisoningRate, backdoorTriggerValues, targetLabel):
			
 
				+    rows_with_trigger = df.sample(frac=poisoningRate)
			
 
				+    rows_with_trigger[backdoorFeatures] = backdoorTriggerValues
			
 
				+    rows_with_trigger[target] = targetLabel
			
 
				+    return rows_with_trigger
			
 
				+
			
 
				+def GenerateBackdoorTrigger(df, backdoorTriggerValues, targetLabel):
			
 
				+    df[backdoorFeatures] = backdoorTriggerValues
			
 
				+    df[target] = targetLabel
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, runIdx):
			
 
				+    # Load dataset
			
 
				+    # Changes to output df will not influence input df
			
 
				+    train_and_valid, test = train_test_split(data, stratify=data[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+    
			
 
				+    # Apply backdoor to train and valid data
			
 
				+    random.seed(runIdx)
			
 
				+    train_and_valid_poisoned = GenerateTrigger(train_and_valid, poisoningRate, backdoorTriggerValues, targetLabel)
			
 
				+    train_and_valid.update(train_and_valid_poisoned)
			
 
				+    
			
 
				+    # Create backdoored test version
			
 
				+    # Also copy to not disturb clean test data
			
 
				+    test_backdoor = test.copy()
			
 
				+
			
 
				+    # Drop rows that already have the target label
			
 
				+    test_backdoor = test_backdoor[test_backdoor[target[0]] != targetLabel]
			
 
				+    
			
 
				+    # Add backdoor to all test_backdoor samples
			
 
				+    test_backdoor = GenerateBackdoorTrigger(test_backdoor, backdoorTriggerValues, targetLabel)
			
 
				+    
			
 
				+    # Split dataset into samples and labels
			
 
				+    train, valid = train_test_split(train_and_valid, stratify=train_and_valid[target[0]], test_size=0.2, random_state=runIdx)
			
 
				+
			
 
				+    X_train = train.drop(target[0], axis=1)
			
 
				+    y_train = train[target[0]]
			
 
				+
			
 
				+    X_valid = valid.drop(target[0], axis=1)
			
 
				+    y_valid = valid[target[0]]
			
 
				+
			
 
				+    X_test = test.drop(target[0], axis=1)
			
 
				+    y_test = test[target[0]]
			
 
				+
			
 
				+    X_test_backdoor = test_backdoor.drop(target[0], axis=1)
			
 
				+    y_test_backdoor = test_backdoor[target[0]]
			
 
				+
			
 
				+    # Normalize
			
 
				+    normalizer = StandardScaler()
			
 
				+    normalizer.fit(X_train[num_cols])
			
 
				+
			
 
				+    X_train[num_cols] = normalizer.transform(X_train[num_cols])
			
 
				+    X_valid[num_cols] = normalizer.transform(X_valid[num_cols])
			
 
				+    X_test[num_cols] = normalizer.transform(X_test[num_cols])
			
 
				+    X_test_backdoor[num_cols] = normalizer.transform(X_test_backdoor[num_cols])
			
 
				+    
			
 
				+    # Create network
			
 
				+    clf = TabNetClassifier(
			
 
				+        device_name=DEVICE,
			
 
				+        n_d=64, n_a=64, n_steps=5,
			
 
				+        gamma=1.5, n_independent=2, n_shared=2,
			
 
				+        
			
 
				+        momentum=0.3,
			
 
				+        mask_type="entmax",
			
 
				+    )
			
 
				+
			
 
				+    # Fit network on backdoored data
			
 
				+    clf.fit(
			
 
				+        X_train=X_train.values, y_train=y_train.values,
			
 
				+        eval_set=[(X_train.values, y_train.values), (X_valid.values, y_valid.values)],
			
 
				+        eval_name=['train', 'valid'],
			
 
				+        eval_metric=["auc", "accuracy"],
			
 
				+        max_epochs=EPOCHS, patience=EPOCHS,
			
 
				+        batch_size=16384, virtual_batch_size=512,
			
 
				+        #num_workers = 0,
			
 
				+    )
			
 
				+    
			
 
				+    # Evaluate backdoor    
			
 
				+    y_pred = clf.predict(X_test_backdoor.values)
			
 
				+    ASR = accuracy_score(y_pred=y_pred, y_true=y_test_backdoor.values)
			
 
				+
			
 
				+    y_pred = clf.predict(X_test.values)
			
 
				+    BA = accuracy_score(y_pred=y_pred, y_true=y_test.values)
			
 
				+
			
 
				+    y_pred = clf.predict_proba(X_test.values)
			
 
				+    pos_probs = y_pred[:, 1]
			
 
				+    BAUC = roc_auc_score(y_test, pos_probs)
			
 
				+    
			
 
				+    return ASR, BA, BAUC
			
 
				+
			
 
				+
			
 
				+# Start experiment
			
 
				+# Global results
			
 
				+ASR_results = []
			
 
				+BA_results = []
			
 
				+BAUC_results = []
			
 
				+
			
 
				+for poisoningRate in poisoningRates:
			
 
				+    # Run results
			
 
				+    ASR_run = []
			
 
				+    BA_run = []
			
 
				+    BAUC_run = []
			
 
				+    
			
 
				+    for run in range(RERUNS):
			
 
				+        ASR, BA, BAUC = doExperiment(poisoningRate, backdoorFeatures, backdoorTriggerValues, targetLabel, run+1)
			
 
				+        print("Results for", poisoningRate, "Run", run+1)
			
 
				+        print("ASR:", ASR)
			
 
				+        print("BA:", BA)
			
 
				+        print("BAUC:", BAUC)
			
 
				+        print("---------------------------------------")
			
 
				+        ASR_run.append(ASR)
			
 
				+        BA_run.append(BA)
			
 
				+        BAUC_run.append(BAUC)
			
 
				+        
			
 
				+    ASR_results.append(ASR_run)
			
 
				+    BA_results.append(BA_run)
			
 
				+    BAUC_results.append(BAUC_run)
			
 
				+
			
 
				+
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print("Results for", poisoningRate)
			
 
				+    print("ASR:", ASR_results[idx])
			
 
				+    print("BA:", BA_results[idx])
			
 
				+    print("BAUC:", BAUC_results[idx])
			
 
				+    print("------------------------------------------")
			
 
				+
			
 
				+print("________________________")
			
 
				+print("EASY COPY PASTE RESULTS:")
			
 
				+print("ASR_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(ASR_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BA_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BA_results[idx], ",")
			
 
				+print("]")
			
 
				+
			
 
				+print()
			
 
				+print("BAUC_results = [")
			
 
				+for idx, poisoningRate in enumerate(poisoningRates):
			
 
				+    print(BAUC_results[idx], ",")
			
 
				+print("]")
			
--- a/ExpTriggerSize/run_experiment.sh
+++ b/ExpTriggerSize/run_experiment.sh
@@ -0,0 +1,18 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# Make sure certain folders exist to prevent crashes
			
 
				+mkdir -p output
			
 
				+mkdir -p output/triggersize
			
 
				+mkdir -p FTtransformerCheckpoints
			
 
				+mkdir -p data/covtypeFTT-1F-OOB
			
 
				+mkdir -p data/covtypeFTT-2F-OOB
			
 
				+mkdir -p data/covtypeFTT-3F-OOB
			
 
				+mkdir -p data/loanFTT-1F-OOB
			
 
				+mkdir -p data/loanFTT-2F-OOB
			
 
				+mkdir -p data/loanFTT-3F-OOB
			
 
				+mkdir -p data/higgsFTT-1F-OOB
			
 
				+mkdir -p data/higgsFTT-2F-OOB
			
 
				+mkdir -p data/higgsFTT-3F-OOB
			
 
				+
			
 
				+# Run the experiment
			
 
				+python -m ExpTriggerSize.$1 > output/triggersize/$1.log
			
--- a/FTtransformer/ft_transformer.py
+++ b/FTtransformer/ft_transformer.py
@@ -0,0 +1,499 @@
 
				+# %%
			
 
				+import math
			
 
				+import typing as ty
			
 
				+from pathlib import Path
			
 
				+
			
 
				+import numpy as np
			
 
				+import torch
			
 
				+import torch.nn as nn
			
 
				+import torch.nn.functional as F
			
 
				+import torch.nn.init as nn_init
			
 
				+import zero
			
 
				+from torch import Tensor
			
 
				+
			
 
				+from . import lib
			
 
				+
			
 
				+# %%
			
 
				+class Tokenizer(nn.Module):
			
 
				+    category_offsets: ty.Optional[Tensor]
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        d_numerical: int,
			
 
				+        categories: ty.Optional[ty.List[int]],
			
 
				+        d_token: int,
			
 
				+        bias: bool,
			
 
				+    ) -> None:
			
 
				+        super().__init__()
			
 
				+        if categories is None:
			
 
				+            d_bias = d_numerical
			
 
				+            self.category_offsets = None
			
 
				+            self.category_embeddings = None
			
 
				+        else:
			
 
				+            d_bias = d_numerical + len(categories)
			
 
				+            category_offsets = torch.tensor([0] + categories[:-1]).cumsum(0)
			
 
				+            self.register_buffer('category_offsets', category_offsets)
			
 
				+            self.category_embeddings = nn.Embedding(sum(categories), d_token)
			
 
				+            nn_init.kaiming_uniform_(self.category_embeddings.weight, a=math.sqrt(5))
			
 
				+            print(f'{self.category_embeddings.weight.shape=}')
			
 
				+
			
 
				+        # take [CLS] token into account
			
 
				+        self.weight = nn.Parameter(Tensor(d_numerical + 1, d_token))
			
 
				+        self.bias = nn.Parameter(Tensor(d_bias, d_token)) if bias else None
			
 
				+        # The initialization is inspired by nn.Linear
			
 
				+        nn_init.kaiming_uniform_(self.weight, a=math.sqrt(5))
			
 
				+        if self.bias is not None:
			
 
				+            nn_init.kaiming_uniform_(self.bias, a=math.sqrt(5))
			
 
				+
			
 
				+    @property
			
 
				+    def n_tokens(self) -> int:
			
 
				+        return len(self.weight) + (
			
 
				+            0 if self.category_offsets is None else len(self.category_offsets)
			
 
				+        )
			
 
				+
			
 
				+    def forward(self, x_num: Tensor, x_cat: ty.Optional[Tensor]) -> Tensor:
			
 
				+        x_some = x_num if x_cat is None else x_cat
			
 
				+        assert x_some is not None
			
 
				+        x_num = torch.cat(
			
 
				+            [torch.ones(len(x_some), 1, device=x_some.device)]  # [CLS]
			
 
				+            + ([] if x_num is None else [x_num]),
			
 
				+            dim=1,
			
 
				+        )
			
 
				+        x = self.weight[None] * x_num[:, :, None]
			
 
				+        if x_cat is not None:
			
 
				+            x = torch.cat(
			
 
				+                [x, self.category_embeddings(x_cat + self.category_offsets[None])],
			
 
				+                dim=1,
			
 
				+            )
			
 
				+        if self.bias is not None:
			
 
				+            bias = torch.cat(
			
 
				+                [
			
 
				+                    torch.zeros(1, self.bias.shape[1], device=x.device),
			
 
				+                    self.bias,
			
 
				+                ]
			
 
				+            )
			
 
				+            x = x + bias[None]
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class MultiheadAttention(nn.Module):
			
 
				+    def __init__(
			
 
				+        self, d: int, n_heads: int, dropout: float, initialization: str
			
 
				+    ) -> None:
			
 
				+        if n_heads > 1:
			
 
				+            assert d % n_heads == 0
			
 
				+        assert initialization in ['xavier', 'kaiming']
			
 
				+
			
 
				+        super().__init__()
			
 
				+        self.W_q = nn.Linear(d, d)
			
 
				+        self.W_k = nn.Linear(d, d)
			
 
				+        self.W_v = nn.Linear(d, d)
			
 
				+        self.W_out = nn.Linear(d, d) if n_heads > 1 else None
			
 
				+        self.n_heads = n_heads
			
 
				+        self.dropout = nn.Dropout(dropout) if dropout else None
			
 
				+
			
 
				+        for m in [self.W_q, self.W_k, self.W_v]:
			
 
				+            if initialization == 'xavier' and (n_heads > 1 or m is not self.W_v):
			
 
				+                # gain is needed since W_qkv is represented with 3 separate layers
			
 
				+                nn_init.xavier_uniform_(m.weight, gain=1 / math.sqrt(2))
			
 
				+            nn_init.zeros_(m.bias)
			
 
				+        if self.W_out is not None:
			
 
				+            nn_init.zeros_(self.W_out.bias)
			
 
				+
			
 
				+    def _reshape(self, x: Tensor) -> Tensor:
			
 
				+        batch_size, n_tokens, d = x.shape
			
 
				+        d_head = d // self.n_heads
			
 
				+        return (
			
 
				+            x.reshape(batch_size, n_tokens, self.n_heads, d_head)
			
 
				+            .transpose(1, 2)
			
 
				+            .reshape(batch_size * self.n_heads, n_tokens, d_head)
			
 
				+        )
			
 
				+
			
 
				+    def forward(
			
 
				+        self,
			
 
				+        x_q: Tensor,
			
 
				+        x_kv: Tensor,
			
 
				+        key_compression: ty.Optional[nn.Linear],
			
 
				+        value_compression: ty.Optional[nn.Linear],
			
 
				+    ) -> Tensor:
			
 
				+        q, k, v = self.W_q(x_q), self.W_k(x_kv), self.W_v(x_kv)
			
 
				+        for tensor in [q, k, v]:
			
 
				+            assert tensor.shape[-1] % self.n_heads == 0
			
 
				+        if key_compression is not None:
			
 
				+            assert value_compression is not None
			
 
				+            k = key_compression(k.transpose(1, 2)).transpose(1, 2)
			
 
				+            v = value_compression(v.transpose(1, 2)).transpose(1, 2)
			
 
				+        else:
			
 
				+            assert value_compression is None
			
 
				+
			
 
				+        batch_size = len(q)
			
 
				+        d_head_key = k.shape[-1] // self.n_heads
			
 
				+        d_head_value = v.shape[-1] // self.n_heads
			
 
				+        n_q_tokens = q.shape[1]
			
 
				+
			
 
				+        q = self._reshape(q)
			
 
				+        k = self._reshape(k)
			
 
				+        attention = F.softmax(q @ k.transpose(1, 2) / math.sqrt(d_head_key), dim=-1)
			
 
				+        if self.dropout is not None:
			
 
				+            attention = self.dropout(attention)
			
 
				+        x = attention @ self._reshape(v)
			
 
				+        x = (
			
 
				+            x.reshape(batch_size, self.n_heads, n_q_tokens, d_head_value)
			
 
				+            .transpose(1, 2)
			
 
				+            .reshape(batch_size, n_q_tokens, self.n_heads * d_head_value)
			
 
				+        )
			
 
				+        if self.W_out is not None:
			
 
				+            x = self.W_out(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Transformer(nn.Module):
			
 
				+    """Transformer.
			
 
				+
			
 
				+    References:
			
 
				+    - https://pytorch.org/docs/stable/generated/torch.nn.Transformer.html
			
 
				+    - https://github.com/facebookresearch/pytext/tree/master/pytext/models/representations/transformer
			
 
				+    - https://github.com/pytorch/fairseq/blob/1bba712622b8ae4efb3eb793a8a40da386fe11d0/examples/linformer/linformer_src/modules/multihead_linear_attention.py#L19
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        *,
			
 
				+        # tokenizer
			
 
				+        d_numerical: int,
			
 
				+        categories: ty.Optional[ty.List[int]],
			
 
				+        token_bias: bool,
			
 
				+        # transformer
			
 
				+        n_layers: int,
			
 
				+        d_token: int,
			
 
				+        n_heads: int,
			
 
				+        d_ffn_factor: float,
			
 
				+        attention_dropout: float,
			
 
				+        ffn_dropout: float,
			
 
				+        residual_dropout: float,
			
 
				+        activation: str,
			
 
				+        prenormalization: bool,
			
 
				+        initialization: str,
			
 
				+        # linformer
			
 
				+        kv_compression: ty.Optional[float],
			
 
				+        kv_compression_sharing: ty.Optional[str],
			
 
				+        #
			
 
				+        d_out: int,
			
 
				+    ) -> None:
			
 
				+        assert (kv_compression is None) ^ (kv_compression_sharing is not None)
			
 
				+
			
 
				+        super().__init__()
			
 
				+        self.tokenizer = Tokenizer(d_numerical, categories, d_token, token_bias)
			
 
				+        n_tokens = self.tokenizer.n_tokens
			
 
				+
			
 
				+        def make_kv_compression():
			
 
				+            assert kv_compression
			
 
				+            compression = nn.Linear(
			
 
				+                n_tokens, int(n_tokens * kv_compression), bias=False
			
 
				+            )
			
 
				+            if initialization == 'xavier':
			
 
				+                nn_init.xavier_uniform_(compression.weight)
			
 
				+            return compression
			
 
				+
			
 
				+        self.shared_kv_compression = (
			
 
				+            make_kv_compression()
			
 
				+            if kv_compression and kv_compression_sharing == 'layerwise'
			
 
				+            else None
			
 
				+        )
			
 
				+
			
 
				+        def make_normalization():
			
 
				+            return nn.LayerNorm(d_token)
			
 
				+
			
 
				+        d_hidden = int(d_token * d_ffn_factor)
			
 
				+        self.layers = nn.ModuleList([])
			
 
				+        for layer_idx in range(n_layers):
			
 
				+            layer = nn.ModuleDict(
			
 
				+                {
			
 
				+                    'attention': MultiheadAttention(
			
 
				+                        d_token, n_heads, attention_dropout, initialization
			
 
				+                    ),
			
 
				+                    'linear0': nn.Linear(
			
 
				+                        d_token, d_hidden * (2 if activation.endswith('glu') else 1)
			
 
				+                    ),
			
 
				+                    'linear1': nn.Linear(d_hidden, d_token),
			
 
				+                    'norm1': make_normalization(),
			
 
				+                }
			
 
				+            )
			
 
				+            if not prenormalization or layer_idx:
			
 
				+                layer['norm0'] = make_normalization()
			
 
				+            if kv_compression and self.shared_kv_compression is None:
			
 
				+                layer['key_compression'] = make_kv_compression()
			
 
				+                if kv_compression_sharing == 'headwise':
			
 
				+                    layer['value_compression'] = make_kv_compression()
			
 
				+                else:
			
 
				+                    assert kv_compression_sharing == 'key-value'
			
 
				+            self.layers.append(layer)
			
 
				+
			
 
				+        self.activation = lib.get_activation_fn(activation)
			
 
				+        self.last_activation = lib.get_nonglu_activation_fn(activation)
			
 
				+        self.prenormalization = prenormalization
			
 
				+        self.last_normalization = make_normalization() if prenormalization else None
			
 
				+        self.ffn_dropout = ffn_dropout
			
 
				+        self.residual_dropout = residual_dropout
			
 
				+        self.head = nn.Linear(d_token, d_out)
			
 
				+
			
 
				+    def _get_kv_compressions(self, layer):
			
 
				+        return (
			
 
				+            (self.shared_kv_compression, self.shared_kv_compression)
			
 
				+            if self.shared_kv_compression is not None
			
 
				+            else (layer['key_compression'], layer['value_compression'])
			
 
				+            if 'key_compression' in layer and 'value_compression' in layer
			
 
				+            else (layer['key_compression'], layer['key_compression'])
			
 
				+            if 'key_compression' in layer
			
 
				+            else (None, None)
			
 
				+        )
			
 
				+
			
 
				+    def _start_residual(self, x, layer, norm_idx):
			
 
				+        x_residual = x
			
 
				+        if self.prenormalization:
			
 
				+            norm_key = f'norm{norm_idx}'
			
 
				+            if norm_key in layer:
			
 
				+                x_residual = layer[norm_key](x_residual)
			
 
				+        return x_residual
			
 
				+
			
 
				+    def _end_residual(self, x, x_residual, layer, norm_idx):
			
 
				+        if self.residual_dropout:
			
 
				+            x_residual = F.dropout(x_residual, self.residual_dropout, self.training)
			
 
				+        x = x + x_residual
			
 
				+        if not self.prenormalization:
			
 
				+            x = layer[f'norm{norm_idx}'](x)
			
 
				+        return x
			
 
				+
			
 
				+    def forward(self, x_num: Tensor, x_cat: ty.Optional[Tensor]) -> Tensor:
			
 
				+        x = self.tokenizer(x_num, x_cat)
			
 
				+
			
 
				+        for layer_idx, layer in enumerate(self.layers):
			
 
				+            is_last_layer = layer_idx + 1 == len(self.layers)
			
 
				+            layer = ty.cast(ty.Dict[str, nn.Module], layer)
			
 
				+
			
 
				+            x_residual = self._start_residual(x, layer, 0)
			
 
				+            x_residual = layer['attention'](
			
 
				+                # for the last attention, it is enough to process only [CLS]
			
 
				+                (x_residual[:, :1] if is_last_layer else x_residual),
			
 
				+                x_residual,
			
 
				+                *self._get_kv_compressions(layer),
			
 
				+            )
			
 
				+            if is_last_layer:
			
 
				+                x = x[:, : x_residual.shape[1]]
			
 
				+            x = self._end_residual(x, x_residual, layer, 0)
			
 
				+
			
 
				+            x_residual = self._start_residual(x, layer, 1)
			
 
				+            x_residual = layer['linear0'](x_residual)
			
 
				+            x_residual = self.activation(x_residual)
			
 
				+            if self.ffn_dropout:
			
 
				+                x_residual = F.dropout(x_residual, self.ffn_dropout, self.training)
			
 
				+            x_residual = layer['linear1'](x_residual)
			
 
				+            x = self._end_residual(x, x_residual, layer, 1)
			
 
				+
			
 
				+        assert x.shape[1] == 1
			
 
				+        x = x[:, 0]
			
 
				+        if self.last_normalization is not None:
			
 
				+            x = self.last_normalization(x)
			
 
				+        x = self.last_activation(x)
			
 
				+        x = self.head(x)
			
 
				+        x = x.squeeze(-1)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class FTtransformer():
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        config
			
 
				+        ):
			
 
				+        self.config = config
			
 
				+
			
 
				+    def fit(self, checkpoint_path):
			
 
				+        config = self.config # quick dirty method
			
 
				+
			
 
				+        zero.set_randomness(config['seed'])
			
 
				+        dataset_dir = config['data']['path']
			
 
				+
			
 
				+        D = lib.Dataset.from_dir(dataset_dir)
			
 
				+        X = D.build_X(
			
 
				+            normalization=config['data'].get('normalization'),
			
 
				+            num_nan_policy='mean',
			
 
				+            cat_nan_policy='new',
			
 
				+            cat_policy=config['data'].get('cat_policy', 'indices'),
			
 
				+            cat_min_frequency=config['data'].get('cat_min_frequency', 0.0),
			
 
				+            seed=config['seed'],
			
 
				+        )
			
 
				+        if not isinstance(X, tuple):
			
 
				+            X = (X, None)
			
 
				+
			
 
				+        Y, y_info = D.build_y(config['data'].get('y_policy'))
			
 
				+
			
 
				+        X = tuple(None if x is None else lib.to_tensors(x) for x in X)
			
 
				+        Y = lib.to_tensors(Y)
			
 
				+        device = torch.device(config['training']['device'])
			
 
				+        print("Using device:", config['training']['device'])
			
 
				+        if device.type != 'cpu':
			
 
				+            X = tuple(
			
 
				+                None if x is None else {k: v.to(device) for k, v in x.items()} for x in X
			
 
				+            )
			
 
				+            Y_device = {k: v.to(device) for k, v in Y.items()}
			
 
				+        else:
			
 
				+            Y_device = Y
			
 
				+        X_num, X_cat = X
			
 
				+        del X
			
 
				+        if not D.is_multiclass:
			
 
				+            Y_device = {k: v.float() for k, v in Y_device.items()}
			
 
				+
			
 
				+        train_size = D.size(lib.TRAIN)
			
 
				+        batch_size = config['training']['batch_size']
			
 
				+        epoch_size = math.ceil(train_size / batch_size)
			
 
				+        eval_batch_size = config['training']['eval_batch_size']
			
 
				+        chunk_size = None
			
 
				+
			
 
				+        loss_fn = (
			
 
				+            F.binary_cross_entropy_with_logits
			
 
				+            if D.is_binclass
			
 
				+            else F.cross_entropy
			
 
				+            if D.is_multiclass
			
 
				+            else F.mse_loss
			
 
				+        )
			
 
				+
			
 
				+        model = Transformer(
			
 
				+            d_numerical=0 if X_num is None else X_num['train'].shape[1],
			
 
				+            categories=lib.get_categories(X_cat),
			
 
				+            d_out=D.info['n_classes'] if D.is_multiclass else 1,
			
 
				+            **config['model'],
			
 
				+        ).to(device)
			
 
				+
			
 
				+        def needs_wd(name):
			
 
				+            return all(x not in name for x in ['tokenizer', '.norm', '.bias'])
			
 
				+
			
 
				+        for x in ['tokenizer', '.norm', '.bias']:
			
 
				+            assert any(x in a for a in (b[0] for b in model.named_parameters()))
			
 
				+        parameters_with_wd = [v for k, v in model.named_parameters() if needs_wd(k)]
			
 
				+        parameters_without_wd = [v for k, v in model.named_parameters() if not needs_wd(k)]
			
 
				+        optimizer = lib.make_optimizer(
			
 
				+            config['training']['optimizer'],
			
 
				+            (
			
 
				+                [
			
 
				+                    {'params': parameters_with_wd},
			
 
				+                    {'params': parameters_without_wd, 'weight_decay': 0.0},
			
 
				+                ]
			
 
				+            ),
			
 
				+            config['training']['lr'],
			
 
				+            config['training']['weight_decay'],
			
 
				+        )
			
 
				+
			
 
				+        stream = zero.Stream(lib.IndexLoader(train_size, batch_size, True, device))
			
 
				+        progress = zero.ProgressTracker(config['training']['patience'])
			
 
				+        training_log = {lib.TRAIN: [], lib.VAL: [], lib.TEST: []}
			
 
				+        timer = zero.Timer()
			
 
				+        output = "Checkpoints"
			
 
				+
			
 
				+        def print_epoch_info():
			
 
				+            print(f'\n>>> Epoch {stream.epoch} | {lib.format_seconds(timer())} | {output}')
			
 
				+            print(
			
 
				+                ' | '.join(
			
 
				+                    f'{k} = {v}'
			
 
				+                    for k, v in {
			
 
				+                        'lr': lib.get_lr(optimizer),
			
 
				+                        'batch_size': batch_size,
			
 
				+                        'chunk_size': chunk_size,
			
 
				+                    }.items()
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+        def apply_model(part, idx):
			
 
				+            return model(
			
 
				+                None if X_num is None else X_num[part][idx],
			
 
				+                None if X_cat is None else X_cat[part][idx],
			
 
				+            )
			
 
				+
			
 
				+        @torch.no_grad()
			
 
				+        def evaluate(parts):
			
 
				+            eval_batch_size = self.config['training']['eval_batch_size']
			
 
				+            model.eval()
			
 
				+            metrics = {}
			
 
				+            predictions = {}
			
 
				+            for part in parts:
			
 
				+                while eval_batch_size:
			
 
				+                    try:
			
 
				+                        predictions[part] = (
			
 
				+                            torch.cat(
			
 
				+                                [
			
 
				+                                    apply_model(part, idx)
			
 
				+                                    for idx in lib.IndexLoader(
			
 
				+                                        D.size(part), eval_batch_size, False, device
			
 
				+                                    )
			
 
				+                                ]
			
 
				+                            )
			
 
				+                            .cpu()
			
 
				+                            .numpy()
			
 
				+                        )
			
 
				+                    except RuntimeError as err:
			
 
				+                        if not lib.is_oom_exception(err):
			
 
				+                            raise
			
 
				+                        eval_batch_size //= 2
			
 
				+                        print('New eval batch size:', eval_batch_size)
			
 
				+                    else:
			
 
				+                        break
			
 
				+                if not eval_batch_size:
			
 
				+                    RuntimeError('Not enough memory even for eval_batch_size=1')
			
 
				+                metrics[part] = lib.calculate_metrics(
			
 
				+                    D.info['task_type'],
			
 
				+                    Y[part].numpy(),  # type: ignore[code]
			
 
				+                    predictions[part],  # type: ignore[code]
			
 
				+                    'logits',
			
 
				+                    y_info,
			
 
				+                )
			
 
				+            for part, part_metrics in metrics.items():
			
 
				+                print(f'[{part:<5}]', lib.make_summary(part_metrics))
			
 
				+            return metrics, predictions
			
 
				+
			
 
				+        def save_checkpoint(final):
			
 
				+            torch.save(
			
 
				+                {
			
 
				+                    'model': model.state_dict(),
			
 
				+                    'optimizer': optimizer.state_dict(),
			
 
				+                    'stream': stream.state_dict(),
			
 
				+                    'random_state': zero.get_random_state(),
			
 
				+                },
			
 
				+                checkpoint_path,
			
 
				+            )
			
 
				+
			
 
				+        zero.set_randomness(config['seed'])
			
 
				+
			
 
				+        for epoch in stream.epochs(config['training']['n_epochs']):
			
 
				+            print(f'\n>>> Epoch {stream.epoch} | {lib.format_seconds(timer())}')
			
 
				+            model.train()
			
 
				+            epoch_losses = []
			
 
				+            for batch_idx in epoch:
			
 
				+                loss, new_chunk_size = lib.train_with_auto_virtual_batch(
			
 
				+                    optimizer,
			
 
				+                    loss_fn,
			
 
				+                    lambda x: (apply_model(lib.TRAIN, x), Y_device[lib.TRAIN][x]),
			
 
				+                    batch_idx,
			
 
				+                    chunk_size or batch_size,
			
 
				+                )
			
 
				+                epoch_losses.append(loss.detach())
			
 
				+                if new_chunk_size and new_chunk_size < (chunk_size or batch_size):
			
 
				+                    print('New chunk size:', chunk_size)
			
 
				+            epoch_losses = torch.stack(epoch_losses).tolist()
			
 
				+            print(f'[{lib.TRAIN}] loss = {round(sum(epoch_losses) / len(epoch_losses), 3)}')
			
 
				+
			
 
				+            metrics, predictions = evaluate([lib.VAL, lib.TEST])
			
 
				+            for k, v in metrics.items():
			
 
				+                training_log[k].append(v)
			
 
				+            progress.update(metrics[lib.VAL]['score'])
			
 
				+
			
 
				+            if progress.success:
			
 
				+                print('New best epoch!')
			
 
				+                save_checkpoint(False)
			
 
				+
			
 
				+            elif progress.fail:
			
 
				+                break
			
 
				+
			
 
				+        # Load best checkpoint
			
 
				+        model.load_state_dict(torch.load(checkpoint_path)['model'])
			
 
				+        metrics, predictions = evaluate(lib.PARTS)
			
 
				+
			
 
				+        return metrics
			
--- a/FTtransformer/lib/__init__.py
+++ b/FTtransformer/lib/__init__.py
@@ -0,0 +1,4 @@
 
				+from .data import *  # noqa
			
 
				+from .deep import *  # noqa
			
 
				+from .metrics import *  # noqa
			
 
				+from .util import *  # noqa
			
--- a/FTtransformer/lib/data.py
+++ b/FTtransformer/lib/data.py
@@ -0,0 +1,234 @@
 
				+import dataclasses as dc
			
 
				+import pickle
			
 
				+import typing as ty
			
 
				+import warnings
			
 
				+from collections import Counter
			
 
				+from copy import deepcopy
			
 
				+from pathlib import Path
			
 
				+
			
 
				+import numpy as np
			
 
				+import sklearn.preprocessing
			
 
				+import torch
			
 
				+from category_encoders import LeaveOneOutEncoder
			
 
				+from sklearn.impute import SimpleImputer
			
 
				+
			
 
				+from . import util
			
 
				+
			
 
				+ArrayDict = ty.Dict[str, np.ndarray]
			
 
				+
			
 
				+
			
 
				+def normalize(
			
 
				+    X: ArrayDict, normalization: str, seed: int, noise: float = 1e-3
			
 
				+) -> ArrayDict:
			
 
				+    X_train = X['train'].copy()
			
 
				+    if normalization == 'standard':
			
 
				+        normalizer = sklearn.preprocessing.StandardScaler()
			
 
				+    elif normalization == 'quantile':
			
 
				+        normalizer = sklearn.preprocessing.QuantileTransformer(
			
 
				+            output_distribution='normal',
			
 
				+            n_quantiles=max(min(X['train'].shape[0] // 30, 1000), 10),
			
 
				+            subsample=1e9,
			
 
				+            random_state=seed,
			
 
				+        )
			
 
				+        if noise:
			
 
				+            stds = np.std(X_train, axis=0, keepdims=True)
			
 
				+            noise_std = noise / np.maximum(stds, noise)  # type: ignore[code]
			
 
				+            X_train += noise_std * np.random.default_rng(seed).standard_normal(  # type: ignore[code]
			
 
				+                X_train.shape
			
 
				+            )
			
 
				+    else:
			
 
				+        util.raise_unknown('normalization', normalization)
			
 
				+    normalizer.fit(X_train)
			
 
				+    return {k: normalizer.transform(v) for k, v in X.items()}  # type: ignore[code]
			
 
				+
			
 
				+
			
 
				+@dc.dataclass
			
 
				+class Dataset:
			
 
				+    N: ty.Optional[ArrayDict]
			
 
				+    C: ty.Optional[ArrayDict]
			
 
				+    y: ArrayDict
			
 
				+    info: ty.Dict[str, ty.Any]
			
 
				+    folder: ty.Optional[Path]
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_dir(cls, dir_: ty.Union[Path, str]) -> 'Dataset':
			
 
				+        dir_ = Path(dir_)
			
 
				+
			
 
				+        def load(item) -> ArrayDict:
			
 
				+            return {
			
 
				+                x: ty.cast(np.ndarray, np.load(dir_ / f'{item}_{x}.npy', allow_pickle=True))  # type: ignore[code]
			
 
				+                for x in ['train', 'val', 'test', 'test_backdoor']
			
 
				+            }
			
 
				+
			
 
				+        return Dataset(
			
 
				+            load('N') if dir_.joinpath('N_train.npy').exists() else None,
			
 
				+            load('C') if dir_.joinpath('C_train.npy').exists() else None,
			
 
				+            load('y'),
			
 
				+            util.load_json(dir_ / 'info.json'),
			
 
				+            dir_,
			
 
				+        )
			
 
				+
			
 
				+    @property
			
 
				+    def is_binclass(self) -> bool:
			
 
				+        return self.info['task_type'] == util.BINCLASS
			
 
				+
			
 
				+    @property
			
 
				+    def is_multiclass(self) -> bool:
			
 
				+        return self.info['task_type'] == util.MULTICLASS
			
 
				+
			
 
				+    @property
			
 
				+    def is_regression(self) -> bool:
			
 
				+        return self.info['task_type'] == util.REGRESSION
			
 
				+
			
 
				+    @property
			
 
				+    def n_num_features(self) -> int:
			
 
				+        return self.info['n_num_features']
			
 
				+
			
 
				+    @property
			
 
				+    def n_cat_features(self) -> int:
			
 
				+        return self.info['n_cat_features']
			
 
				+
			
 
				+    @property
			
 
				+    def n_features(self) -> int:
			
 
				+        return self.n_num_features + self.n_cat_features
			
 
				+
			
 
				+    def size(self, part: str) -> int:
			
 
				+        X = self.N if self.N is not None else self.C
			
 
				+        assert X is not None
			
 
				+        return len(X[part])
			
 
				+
			
 
				+    def build_X(
			
 
				+        self,
			
 
				+        *,
			
 
				+        normalization: ty.Optional[str],
			
 
				+        num_nan_policy: str,
			
 
				+        cat_nan_policy: str,
			
 
				+        cat_policy: str,
			
 
				+        cat_min_frequency: float = 0.0,
			
 
				+        seed: int,
			
 
				+    ) -> ty.Union[ArrayDict, ty.Tuple[ArrayDict, ArrayDict]]:
			
 
				+        if self.N:
			
 
				+            N = deepcopy(self.N)
			
 
				+
			
 
				+            num_nan_masks = {k: np.isnan(v) for k, v in N.items()}
			
 
				+            if any(x.any() for x in num_nan_masks.values()):  # type: ignore[code]
			
 
				+                if num_nan_policy == 'mean':
			
 
				+                    num_new_values = np.nanmean(self.N['train'], axis=0)
			
 
				+                else:
			
 
				+                    util.raise_unknown('numerical NaN policy', num_nan_policy)
			
 
				+                for k, v in N.items():
			
 
				+                    num_nan_indices = np.where(num_nan_masks[k])
			
 
				+                    v[num_nan_indices] = np.take(num_new_values, num_nan_indices[1])
			
 
				+            if normalization:
			
 
				+                N = normalize(N, normalization, seed)
			
 
				+
			
 
				+        else:
			
 
				+            N = None
			
 
				+
			
 
				+        if cat_policy == 'drop' or not self.C:
			
 
				+            assert N is not None
			
 
				+            return N
			
 
				+
			
 
				+        C = deepcopy(self.C)
			
 
				+
			
 
				+        cat_nan_masks = {k: v == 'nan' for k, v in C.items()}
			
 
				+        if any(x.any() for x in cat_nan_masks.values()):  # type: ignore[code]
			
 
				+            if cat_nan_policy == 'new':
			
 
				+                cat_new_value = '___null___'
			
 
				+                imputer = None
			
 
				+            elif cat_nan_policy == 'most_frequent':
			
 
				+                cat_new_value = None
			
 
				+                imputer = SimpleImputer(strategy=cat_nan_policy)  # type: ignore[code]
			
 
				+                imputer.fit(C['train'])
			
 
				+            else:
			
 
				+                util.raise_unknown('categorical NaN policy', cat_nan_policy)
			
 
				+            if imputer:
			
 
				+                C = {k: imputer.transform(v) for k, v in C.items()}
			
 
				+            else:
			
 
				+                for k, v in C.items():
			
 
				+                    cat_nan_indices = np.where(cat_nan_masks[k])
			
 
				+                    v[cat_nan_indices] = cat_new_value
			
 
				+
			
 
				+        if cat_min_frequency:
			
 
				+            C = ty.cast(ArrayDict, C)
			
 
				+            min_count = round(len(C['train']) * cat_min_frequency)
			
 
				+            rare_value = '___rare___'
			
 
				+            C_new = {x: [] for x in C}
			
 
				+            for column_idx in range(C['train'].shape[1]):
			
 
				+                counter = Counter(C['train'][:, column_idx].tolist())
			
 
				+                popular_categories = {k for k, v in counter.items() if v >= min_count}
			
 
				+                for part in C_new:
			
 
				+                    C_new[part].append(
			
 
				+                        [
			
 
				+                            (x if x in popular_categories else rare_value)
			
 
				+                            for x in C[part][:, column_idx].tolist()
			
 
				+                        ]
			
 
				+                    )
			
 
				+            C = {k: np.array(v).T for k, v in C_new.items()}
			
 
				+
			
 
				+        unknown_value = np.iinfo('int64').max - 3
			
 
				+        encoder = sklearn.preprocessing.OrdinalEncoder(
			
 
				+            handle_unknown='use_encoded_value',  # type: ignore[code]
			
 
				+            unknown_value=unknown_value,  # type: ignore[code]
			
 
				+            dtype='int64',  # type: ignore[code]
			
 
				+        ).fit(C['train'])
			
 
				+        C = {k: encoder.transform(v) for k, v in C.items()}
			
 
				+        max_values = C['train'].max(axis=0)
			
 
				+        for part in ['val', 'test', 'test_backdoor']:
			
 
				+            for column_idx in range(C[part].shape[1]):
			
 
				+                C[part][C[part][:, column_idx] == unknown_value, column_idx] = (
			
 
				+                    max_values[column_idx] + 1
			
 
				+                )
			
 
				+
			
 
				+        if cat_policy == 'indices':
			
 
				+            result = (N, C)
			
 
				+        elif cat_policy == 'ohe':
			
 
				+            ohe = sklearn.preprocessing.OneHotEncoder(
			
 
				+                handle_unknown='ignore', sparse=False, dtype='float32'  # type: ignore[code]
			
 
				+            )
			
 
				+            ohe.fit(C['train'])
			
 
				+            C = {k: ohe.transform(v) for k, v in C.items()}
			
 
				+            result = C if N is None else {x: np.hstack((N[x], C[x])) for x in N}
			
 
				+        elif cat_policy == 'counter':
			
 
				+            assert seed is not None
			
 
				+            loo = LeaveOneOutEncoder(sigma=0.1, random_state=seed, return_df=False)
			
 
				+            loo.fit(C['train'], self.y['train'])
			
 
				+            C = {k: loo.transform(v).astype('float32') for k, v in C.items()}  # type: ignore[code]
			
 
				+            if not isinstance(C['train'], np.ndarray):
			
 
				+                C = {k: v.values for k, v in C.items()}  # type: ignore[code]
			
 
				+            if normalization:
			
 
				+                C = normalize(C, normalization, seed, inplace=True)  # type: ignore[code]
			
 
				+            result = C if N is None else {x: np.hstack((N[x], C[x])) for x in N}
			
 
				+        else:
			
 
				+            util.raise_unknown('categorical policy', cat_policy)
			
 
				+        return result  # type: ignore[code]
			
 
				+
			
 
				+    def build_y(
			
 
				+        self, policy: ty.Optional[str]
			
 
				+    ) -> ty.Tuple[ArrayDict, ty.Optional[ty.Dict[str, ty.Any]]]:
			
 
				+        if self.is_regression:
			
 
				+            assert policy == 'mean_std'
			
 
				+        y = deepcopy(self.y)
			
 
				+        if policy:
			
 
				+            if not self.is_regression:
			
 
				+                warnings.warn('y_policy is not None, but the task is NOT regression')
			
 
				+                info = None
			
 
				+            elif policy == 'mean_std':
			
 
				+                mean, std = self.y['train'].mean(), self.y['train'].std()
			
 
				+                y = {k: (v - mean) / std for k, v in y.items()}
			
 
				+                info = {'policy': policy, 'mean': mean, 'std': std}
			
 
				+            else:
			
 
				+                util.raise_unknown('y policy', policy)
			
 
				+        else:
			
 
				+            info = None
			
 
				+        return y, info
			
 
				+
			
 
				+
			
 
				+def to_tensors(data: ArrayDict) -> ty.Dict[str, torch.Tensor]:
			
 
				+    return {k: torch.as_tensor(v) for k, v in data.items()}
			
 
				+
			
 
				+
			
 
				+def load_dataset_info(dataset_name: str) -> ty.Dict[str, ty.Any]:
			
 
				+    info = util.load_json(env.DATA_DIR / dataset_name / 'info.json')
			
 
				+    info['size'] = info['train_size'] + info['val_size'] + info['test_size'] + info['test_backdoor_size']
			
 
				+    return info
			
--- a/FTtransformer/lib/deep.py
+++ b/FTtransformer/lib/deep.py
@@ -0,0 +1,838 @@
 
				+from __future__ import absolute_import, division, print_function
			
 
				+
			
 
				+import math
			
 
				+import os
			
 
				+import typing as ty
			
 
				+from copy import deepcopy
			
 
				+
			
 
				+import torch
			
 
				+import torch.nn as nn
			
 
				+import torch.nn.functional as F
			
 
				+import torch.optim as optim
			
 
				+import zero
			
 
				+from torch import Tensor
			
 
				+
			
 
				+
			
 
				+class IndexLoader:
			
 
				+    def __init__(
			
 
				+        self, train_size: int, batch_size: int, shuffle: bool, device: torch.device
			
 
				+    ) -> None:
			
 
				+        self._train_size = train_size
			
 
				+        self._batch_size = batch_size
			
 
				+        self._shuffle = shuffle
			
 
				+        self._device = device
			
 
				+
			
 
				+    def __len__(self) -> int:
			
 
				+        return math.ceil(self._train_size / self._batch_size)
			
 
				+
			
 
				+    def __iter__(self):
			
 
				+        indices = list(
			
 
				+            zero.iloader(self._train_size, self._batch_size, shuffle=self._shuffle)
			
 
				+        )
			
 
				+        return iter(torch.cat(indices).to(self._device).split(self._batch_size))
			
 
				+
			
 
				+
			
 
				+class Lambda(nn.Module):
			
 
				+    def __init__(self, f: ty.Callable) -> None:
			
 
				+        super().__init__()
			
 
				+        self.f = f
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        return self.f(x)
			
 
				+
			
 
				+
			
 
				+# Source: https://github.com/bzhangGo/rmsnorm
			
 
				+# NOTE: eps is changed to 1e-5
			
 
				+class RMSNorm(nn.Module):
			
 
				+    def __init__(self, d, p=-1.0, eps=1e-5, bias=False):
			
 
				+        """Root Mean Square Layer Normalization
			
 
				+
			
 
				+        :param d: model size
			
 
				+        :param p: partial RMSNorm, valid value [0, 1], default -1.0 (disabled)
			
 
				+        :param eps:  epsilon value, default 1e-8
			
 
				+        :param bias: whether use bias term for RMSNorm, disabled by
			
 
				+            default because RMSNorm doesn't enforce re-centering invariance.
			
 
				+        """
			
 
				+        super(RMSNorm, self).__init__()
			
 
				+
			
 
				+        self.eps = eps
			
 
				+        self.d = d
			
 
				+        self.p = p
			
 
				+        self.bias = bias
			
 
				+
			
 
				+        self.scale = nn.Parameter(torch.ones(d))
			
 
				+        self.register_parameter("scale", self.scale)
			
 
				+
			
 
				+        if self.bias:
			
 
				+            self.offset = nn.Parameter(torch.zeros(d))
			
 
				+            self.register_parameter("offset", self.offset)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if self.p < 0.0 or self.p > 1.0:
			
 
				+            norm_x = x.norm(2, dim=-1, keepdim=True)
			
 
				+            d_x = self.d
			
 
				+        else:
			
 
				+            partial_size = int(self.d * self.p)
			
 
				+            partial_x, _ = torch.split(x, [partial_size, self.d - partial_size], dim=-1)
			
 
				+
			
 
				+            norm_x = partial_x.norm(2, dim=-1, keepdim=True)
			
 
				+            d_x = partial_size
			
 
				+
			
 
				+        rms_x = norm_x * d_x ** (-1.0 / 2)
			
 
				+        x_normed = x / (rms_x + self.eps)
			
 
				+
			
 
				+        if self.bias:
			
 
				+            return self.scale * x_normed + self.offset
			
 
				+
			
 
				+        return self.scale * x_normed
			
 
				+
			
 
				+
			
 
				+class ScaleNorm(nn.Module):
			
 
				+    """
			
 
				+    Sources:
			
 
				+    - https://github.com/tnq177/transformers_without_tears/blob/25026061979916afb193274438f7097945acf9bc/layers.py#L132
			
 
				+    - https://github.com/tnq177/transformers_without_tears/blob/6b2726cd9e6e642d976ae73b9f696d9d7ff4b395/layers.py#L157
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, d: int, eps: float = 1e-5, clamp: bool = False) -> None:
			
 
				+        super(ScaleNorm, self).__init__()
			
 
				+        self.scale = nn.Parameter(torch.tensor(d ** 0.5))
			
 
				+        self.eps = eps
			
 
				+        self.clamp = clamp
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        norms = torch.norm(x, dim=-1, keepdim=True)
			
 
				+        norms = norms.clamp(min=self.eps) if self.clamp else norms + self.eps
			
 
				+        return self.scale * x / norms
			
 
				+
			
 
				+
			
 
				+def reglu(x: Tensor) -> Tensor:
			
 
				+    a, b = x.chunk(2, dim=-1)
			
 
				+    return a * F.relu(b)
			
 
				+
			
 
				+
			
 
				+def geglu(x: Tensor) -> Tensor:
			
 
				+    a, b = x.chunk(2, dim=-1)
			
 
				+    return a * F.gelu(b)
			
 
				+
			
 
				+
			
 
				+class ReGLU(nn.Module):
			
 
				+    def forward(self, x: Tensor) -> Tensor:
			
 
				+        return reglu(x)
			
 
				+
			
 
				+
			
 
				+class GEGLU(nn.Module):
			
 
				+    def forward(self, x: Tensor) -> Tensor:
			
 
				+        return geglu(x)
			
 
				+
			
 
				+
			
 
				+def make_optimizer(
			
 
				+    optimizer: str,
			
 
				+    parameter_groups,
			
 
				+    lr: float,
			
 
				+    weight_decay: float,
			
 
				+) -> optim.Optimizer:
			
 
				+    Optimizer = {
			
 
				+        'adabelief': AdaBelief,
			
 
				+        'adam': optim.Adam,
			
 
				+        'adamw': optim.AdamW,
			
 
				+        'radam': RAdam,
			
 
				+        'sgd': optim.SGD,
			
 
				+    }[optimizer]
			
 
				+    momentum = (0.9,) if Optimizer is optim.SGD else ()
			
 
				+    return Optimizer(parameter_groups, lr, *momentum, weight_decay=weight_decay)
			
 
				+
			
 
				+
			
 
				+def make_lr_schedule(
			
 
				+    optimizer: optim.Optimizer,
			
 
				+    lr: float,
			
 
				+    epoch_size: int,
			
 
				+    lr_schedule: ty.Optional[ty.Dict[str, ty.Any]],
			
 
				+) -> ty.Tuple[
			
 
				+    ty.Optional[optim.lr_scheduler._LRScheduler],
			
 
				+    ty.Dict[str, ty.Any],
			
 
				+    ty.Optional[int],
			
 
				+]:
			
 
				+    if lr_schedule is None:
			
 
				+        lr_schedule = {'type': 'constant'}
			
 
				+    lr_scheduler = None
			
 
				+    n_warmup_steps = None
			
 
				+    if lr_schedule['type'] in ['transformer', 'linear_warmup']:
			
 
				+        n_warmup_steps = (
			
 
				+            lr_schedule['n_warmup_steps']
			
 
				+            if 'n_warmup_steps' in lr_schedule
			
 
				+            else lr_schedule['n_warmup_epochs'] * epoch_size
			
 
				+        )
			
 
				+    elif lr_schedule['type'] == 'cyclic':
			
 
				+        lr_scheduler = optim.lr_scheduler.CyclicLR(
			
 
				+            optimizer,
			
 
				+            base_lr=lr,
			
 
				+            max_lr=lr_schedule['max_lr'],
			
 
				+            step_size_up=lr_schedule['n_epochs_up'] * epoch_size,
			
 
				+            step_size_down=lr_schedule['n_epochs_down'] * epoch_size,
			
 
				+            mode=lr_schedule['mode'],
			
 
				+            gamma=lr_schedule.get('gamma', 1.0),
			
 
				+            cycle_momentum=False,
			
 
				+        )
			
 
				+    return lr_scheduler, lr_schedule, n_warmup_steps
			
 
				+
			
 
				+
			
 
				+def get_activation_fn(name: str) -> ty.Callable[[Tensor], Tensor]:
			
 
				+    return (
			
 
				+        reglu
			
 
				+        if name == 'reglu'
			
 
				+        else geglu
			
 
				+        if name == 'geglu'
			
 
				+        else torch.sigmoid
			
 
				+        if name == 'sigmoid'
			
 
				+        else getattr(F, name)
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def get_nonglu_activation_fn(name: str) -> ty.Callable[[Tensor], Tensor]:
			
 
				+    return (
			
 
				+        F.relu
			
 
				+        if name == 'reglu'
			
 
				+        else F.gelu
			
 
				+        if name == 'geglu'
			
 
				+        else get_activation_fn(name)
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def load_swa_state_dict(model: nn.Module, swa_model: optim.swa_utils.AveragedModel):
			
 
				+    state_dict = deepcopy(swa_model.state_dict())
			
 
				+    del state_dict['n_averaged']
			
 
				+    model.load_state_dict({k[len('module.') :]: v for k, v in state_dict.items()})
			
 
				+
			
 
				+
			
 
				+def get_epoch_parameters(
			
 
				+    train_size: int, batch_size: ty.Union[int, str]
			
 
				+) -> ty.Tuple[int, int]:
			
 
				+    if isinstance(batch_size, str):
			
 
				+        if batch_size == 'v3':
			
 
				+            batch_size = (
			
 
				+                256 if train_size < 50000 else 512 if train_size < 100000 else 1024
			
 
				+            )
			
 
				+        elif batch_size == 'v1':
			
 
				+            batch_size = (
			
 
				+                16
			
 
				+                if train_size < 1000
			
 
				+                else 32
			
 
				+                if train_size < 10000
			
 
				+                else 64
			
 
				+                if train_size < 50000
			
 
				+                else 128
			
 
				+                if train_size < 100000
			
 
				+                else 256
			
 
				+                if train_size < 200000
			
 
				+                else 512
			
 
				+                if train_size < 500000
			
 
				+                else 1024
			
 
				+            )
			
 
				+        elif batch_size == 'v2':
			
 
				+            batch_size = (
			
 
				+                512 if train_size < 100000 else 1024 if train_size < 500000 else 2048
			
 
				+            )
			
 
				+    return batch_size, math.ceil(train_size / batch_size)  # type: ignore[code]
			
 
				+
			
 
				+
			
 
				+def get_linear_warmup_lr(lr: float, n_warmup_steps: int, step: int) -> float:
			
 
				+    assert step > 0, "1-based enumeration of steps is expected"
			
 
				+    return min(lr, step / (n_warmup_steps + 1) * lr)
			
 
				+
			
 
				+
			
 
				+def get_manual_lr(schedule: ty.List[float], epoch: int) -> float:
			
 
				+    assert epoch > 0, "1-based enumeration of epochs is expected"
			
 
				+    return schedule[min(epoch, len(schedule)) - 1]
			
 
				+
			
 
				+
			
 
				+def get_transformer_lr(scale: float, d: int, n_warmup_steps: int, step: int) -> float:
			
 
				+    return scale * d ** -0.5 * min(step ** -0.5, step * n_warmup_steps ** -1.5)
			
 
				+
			
 
				+
			
 
				+def learn(model, optimizer, loss_fn, step, batch, star) -> ty.Tuple[Tensor, ty.Any]:
			
 
				+    model.train()
			
 
				+    optimizer.zero_grad()
			
 
				+    out = step(batch)
			
 
				+    loss = loss_fn(*out) if star else loss_fn(out)
			
 
				+    loss.backward()
			
 
				+    optimizer.step()
			
 
				+    return loss, out
			
 
				+
			
 
				+
			
 
				+def _learn_with_virtual_batch(
			
 
				+    model, optimizer, loss_fn, step, batch, chunk_size
			
 
				+) -> Tensor:
			
 
				+    batch_size = len(batch)
			
 
				+    if chunk_size >= batch_size:
			
 
				+        return learn(model, optimizer, loss_fn, step, batch, True)[0]
			
 
				+    model.train()
			
 
				+    optimizer.zero_grad()
			
 
				+    total_loss = None
			
 
				+    for chunk in zero.iter_batches(batch, chunk_size):
			
 
				+        loss = loss_fn(*step(chunk))
			
 
				+        loss = loss * len(chunk)
			
 
				+        loss.backward()
			
 
				+        if total_loss is None:
			
 
				+            total_loss = loss.detach()
			
 
				+        else:
			
 
				+            total_loss += loss.detach()
			
 
				+    for x in model.parameters():
			
 
				+        if x.grad is not None:
			
 
				+            x.grad /= batch_size
			
 
				+    optimizer.step()
			
 
				+    return total_loss / batch_size
			
 
				+
			
 
				+
			
 
				+def learn_with_auto_virtual_batch(
			
 
				+    model,
			
 
				+    optimizer,
			
 
				+    loss_fn,
			
 
				+    step,
			
 
				+    batch,
			
 
				+    batch_size_hint: int,
			
 
				+    chunk_size: ty.Optional[int],
			
 
				+) -> ty.Tuple[Tensor, ty.Optional[int]]:
			
 
				+    """This is just an overcomplicated version of `train_with_auto_virtual_batch`."""
			
 
				+    random_state = zero.get_random_state()
			
 
				+    while chunk_size != 0:
			
 
				+        try:
			
 
				+            zero.set_random_state(random_state)
			
 
				+            return (
			
 
				+                _learn_with_virtual_batch(
			
 
				+                    model,
			
 
				+                    optimizer,
			
 
				+                    loss_fn,
			
 
				+                    step,
			
 
				+                    batch,
			
 
				+                    chunk_size or batch_size_hint,
			
 
				+                ),
			
 
				+                chunk_size,
			
 
				+            )
			
 
				+        except RuntimeError as err:
			
 
				+            if not is_oom_exception(err):
			
 
				+                raise
			
 
				+            if chunk_size is None:
			
 
				+                chunk_size = batch_size_hint
			
 
				+            chunk_size //= 2
			
 
				+    raise RuntimeError('Not enough memory even for batch_size=1')
			
 
				+
			
 
				+
			
 
				+def train_with_auto_virtual_batch(
			
 
				+    optimizer,
			
 
				+    loss_fn,
			
 
				+    step,
			
 
				+    batch,
			
 
				+    chunk_size: int,
			
 
				+) -> ty.Tuple[Tensor, int]:
			
 
				+    batch_size = len(batch)
			
 
				+    random_state = zero.get_random_state()
			
 
				+    while chunk_size != 0:
			
 
				+        try:
			
 
				+            zero.set_random_state(random_state)
			
 
				+            optimizer.zero_grad()
			
 
				+            if batch_size <= chunk_size:
			
 
				+                loss = loss_fn(*step(batch))
			
 
				+                loss.backward()
			
 
				+            else:
			
 
				+                loss = None
			
 
				+                for chunk in zero.iter_batches(batch, chunk_size):
			
 
				+                    chunk_loss = loss_fn(*step(chunk))
			
 
				+                    chunk_loss = chunk_loss * (len(chunk) / batch_size)
			
 
				+                    chunk_loss.backward()
			
 
				+                    if loss is None:
			
 
				+                        loss = chunk_loss.detach()
			
 
				+                    else:
			
 
				+                        loss += chunk_loss.detach()
			
 
				+        except RuntimeError as err:
			
 
				+            if not is_oom_exception(err):
			
 
				+                raise
			
 
				+            chunk_size //= 2
			
 
				+        else:
			
 
				+            break
			
 
				+    if not chunk_size:
			
 
				+        raise RuntimeError('Not enough memory even for batch_size=1')
			
 
				+    optimizer.step()
			
 
				+    return loss, chunk_size  # type: ignore[code]
			
 
				+
			
 
				+
			
 
				+def tensor(x) -> torch.Tensor:
			
 
				+    assert isinstance(x, torch.Tensor)
			
 
				+    return ty.cast(torch.Tensor, x)
			
 
				+
			
 
				+
			
 
				+def get_n_parameters(m: nn.Module):
			
 
				+    return sum(x.numel() for x in m.parameters() if x.requires_grad)
			
 
				+
			
 
				+
			
 
				+def get_mlp_n_parameters(units: ty.List[int]):
			
 
				+    x = 0
			
 
				+    for a, b in zip(units, units[1:]):
			
 
				+        x += a * b + b
			
 
				+    return x
			
 
				+
			
 
				+
			
 
				+def get_lr(optimizer: optim.Optimizer) -> float:
			
 
				+    return next(iter(optimizer.param_groups))['lr']
			
 
				+
			
 
				+
			
 
				+def set_lr(optimizer: optim.Optimizer, lr: float) -> None:
			
 
				+    for x in optimizer.param_groups:
			
 
				+        x['lr'] = lr
			
 
				+
			
 
				+
			
 
				+def get_device() -> torch.device:
			
 
				+    return torch.device('cuda:0' if os.environ.get('CUDA_VISIBLE_DEVICES') else 'cpu')
			
 
				+
			
 
				+
			
 
				+@torch.no_grad()
			
 
				+def get_gradient_norm_ratios(m: nn.Module):
			
 
				+    return {
			
 
				+        k: v.grad.norm() / v.norm()
			
 
				+        for k, v in m.named_parameters()
			
 
				+        if v.grad is not None
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def is_oom_exception(err: RuntimeError) -> bool:
			
 
				+    return any(
			
 
				+        x in str(err)
			
 
				+        for x in [
			
 
				+            'CUDA out of memory',
			
 
				+            'CUBLAS_STATUS_ALLOC_FAILED',
			
 
				+            'CUDA error: out of memory',
			
 
				+        ]
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+# Source: https://github.com/LiyuanLucasLiu/RAdam
			
 
				+class RAdam(optim.Optimizer):
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        params,
			
 
				+        lr=1e-3,
			
 
				+        betas=(0.9, 0.999),
			
 
				+        eps=1e-8,
			
 
				+        weight_decay=0,
			
 
				+        degenerated_to_sgd=True,
			
 
				+    ):
			
 
				+        if not 0.0 <= lr:
			
 
				+            raise ValueError("Invalid learning rate: {}".format(lr))
			
 
				+        if not 0.0 <= eps:
			
 
				+            raise ValueError("Invalid epsilon value: {}".format(eps))
			
 
				+        if not 0.0 <= betas[0] < 1.0:
			
 
				+            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
			
 
				+        if not 0.0 <= betas[1] < 1.0:
			
 
				+            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
			
 
				+
			
 
				+        self.degenerated_to_sgd = degenerated_to_sgd
			
 
				+        if (
			
 
				+            isinstance(params, (list, tuple))
			
 
				+            and len(params) > 0
			
 
				+            and isinstance(params[0], dict)
			
 
				+        ):
			
 
				+            for param in params:
			
 
				+                if 'betas' in param and (
			
 
				+                    param['betas'][0] != betas[0] or param['betas'][1] != betas[1]
			
 
				+                ):
			
 
				+                    param['buffer'] = [[None, None, None] for _ in range(10)]
			
 
				+        defaults = dict(
			
 
				+            lr=lr,
			
 
				+            betas=betas,
			
 
				+            eps=eps,
			
 
				+            weight_decay=weight_decay,
			
 
				+            buffer=[[None, None, None] for _ in range(10)],
			
 
				+        )
			
 
				+        super(RAdam, self).__init__(params, defaults)
			
 
				+
			
 
				+    def __setstate__(self, state):
			
 
				+        super(RAdam, self).__setstate__(state)
			
 
				+
			
 
				+    def step(self, closure=None):
			
 
				+
			
 
				+        loss = None
			
 
				+        if closure is not None:
			
 
				+            loss = closure()
			
 
				+
			
 
				+        for group in self.param_groups:
			
 
				+
			
 
				+            for p in group['params']:
			
 
				+                if p.grad is None:
			
 
				+                    continue
			
 
				+                grad = p.grad.data.float()
			
 
				+                if grad.is_sparse:
			
 
				+                    raise RuntimeError('RAdam does not support sparse gradients')
			
 
				+
			
 
				+                p_data_fp32 = p.data.float()
			
 
				+
			
 
				+                state = self.state[p]
			
 
				+
			
 
				+                if len(state) == 0:
			
 
				+                    state['step'] = 0
			
 
				+                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
			
 
				+                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
			
 
				+                else:
			
 
				+                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
			
 
				+                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)
			
 
				+
			
 
				+                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
			
 
				+                beta1, beta2 = group['betas']
			
 
				+
			
 
				+                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
			
 
				+                exp_avg.mul_(beta1).add_(1 - beta1, grad)
			
 
				+
			
 
				+                state['step'] += 1
			
 
				+                buffered = group['buffer'][int(state['step'] % 10)]
			
 
				+                if state['step'] == buffered[0]:
			
 
				+                    N_sma, step_size = buffered[1], buffered[2]
			
 
				+                else:
			
 
				+                    buffered[0] = state['step']
			
 
				+                    beta2_t = beta2 ** state['step']
			
 
				+                    N_sma_max = 2 / (1 - beta2) - 1
			
 
				+                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
			
 
				+                    buffered[1] = N_sma
			
 
				+
			
 
				+                    # more conservative since it's an approximated value
			
 
				+                    if N_sma >= 5:
			
 
				+                        step_size = math.sqrt(
			
 
				+                            (1 - beta2_t)
			
 
				+                            * (N_sma - 4)
			
 
				+                            / (N_sma_max - 4)
			
 
				+                            * (N_sma - 2)
			
 
				+                            / N_sma
			
 
				+                            * N_sma_max
			
 
				+                            / (N_sma_max - 2)
			
 
				+                        ) / (1 - beta1 ** state['step'])
			
 
				+                    elif self.degenerated_to_sgd:
			
 
				+                        step_size = 1.0 / (1 - beta1 ** state['step'])
			
 
				+                    else:
			
 
				+                        step_size = -1
			
 
				+                    buffered[2] = step_size
			
 
				+
			
 
				+                # more conservative since it's an approximated value
			
 
				+                if N_sma >= 5:
			
 
				+                    if group['weight_decay'] != 0:
			
 
				+                        p_data_fp32.add_(
			
 
				+                            -group['weight_decay'] * group['lr'], p_data_fp32
			
 
				+                        )
			
 
				+                    denom = exp_avg_sq.sqrt().add_(group['eps'])
			
 
				+                    p_data_fp32.addcdiv_(-step_size * group['lr'], exp_avg, denom)
			
 
				+                    p.data.copy_(p_data_fp32)
			
 
				+                elif step_size > 0:
			
 
				+                    if group['weight_decay'] != 0:
			
 
				+                        p_data_fp32.add_(
			
 
				+                            -group['weight_decay'] * group['lr'], p_data_fp32
			
 
				+                        )
			
 
				+                    p_data_fp32.add_(-step_size * group['lr'], exp_avg)
			
 
				+                    p.data.copy_(p_data_fp32)
			
 
				+
			
 
				+        return loss
			
 
				+
			
 
				+
			
 
				+version_higher = torch.__version__ >= "1.5.0"
			
 
				+
			
 
				+
			
 
				+# Source: https://github.com/juntang-zhuang/Adabelief-Optimizer
			
 
				+class AdaBelief(optim.Optimizer):
			
 
				+    r"""Implements AdaBelief algorithm. Modified from Adam in PyTorch
			
 
				+    Arguments:
			
 
				+        params (iterable): iterable of parameters to optimize or dicts defining
			
 
				+            parameter groups
			
 
				+        lr (float, optional): learning rate (default: 1e-3)
			
 
				+        betas (Tuple[float, float], optional): coefficients used for computing
			
 
				+            running averages of gradient and its square (default: (0.9, 0.999))
			
 
				+        eps (float, optional): term added to the denominator to improve
			
 
				+            numerical stability (default: 1e-16)
			
 
				+        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
			
 
				+        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
			
 
				+            algorithm from the paper `On the Convergence of Adam and Beyond`_
			
 
				+            (default: False)
			
 
				+        weight_decouple (boolean, optional): ( default: True) If set as True, then
			
 
				+            the optimizer uses decoupled weight decay as in AdamW
			
 
				+        fixed_decay (boolean, optional): (default: False) This is used when weight_decouple
			
 
				+            is set as True.
			
 
				+            When fixed_decay == True, the weight decay is performed as
			
 
				+            $W_{new} = W_{old} - W_{old} \times decay$.
			
 
				+            When fixed_decay == False, the weight decay is performed as
			
 
				+            $W_{new} = W_{old} - W_{old} \times decay \times lr$. Note that in this case, the
			
 
				+            weight decay ratio decreases with learning rate (lr).
			
 
				+        rectify (boolean, optional): (default: True) If set as True, then perform the rectified
			
 
				+            update similar to RAdam
			
 
				+        degenerated_to_sgd (boolean, optional) (default:True) If set as True, then perform SGD update
			
 
				+            when variance of gradient is high
			
 
				+        print_change_log (boolean, optional) (default: True) If set as True, print the modifcation to
			
 
				+            default hyper-parameters
			
 
				+    reference: AdaBelief Optimizer, adapting stepsizes by the belief in observed gradients, NeurIPS 2020
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        params,
			
 
				+        lr=1e-3,
			
 
				+        betas=(0.9, 0.999),
			
 
				+        eps=1e-16,
			
 
				+        weight_decay=0,
			
 
				+        amsgrad=False,
			
 
				+        weight_decouple=True,
			
 
				+        fixed_decay=False,
			
 
				+        rectify=True,
			
 
				+        degenerated_to_sgd=True,
			
 
				+        print_change_log=True,
			
 
				+    ):
			
 
				+
			
 
				+        # ------------------------------------------------------------------------------
			
 
				+        # Print modifications to default arguments
			
 
				+        if print_change_log:
			
 
				+            print(
			
 
				+                'Please check your arguments if you have upgraded adabelief-pytorch from version 0.0.5.'
			
 
				+            )
			
 
				+            print('Modifications to default arguments:')
			
 
				+            default_table = [
			
 
				+                ['eps', 'weight_decouple', 'rectify'],
			
 
				+                ['adabelief-pytorch=0.0.5', '1e-8', 'False', 'False'],
			
 
				+                ['>=0.1.0 (Current 0.2.0)', '1e-16', 'True', 'True'],
			
 
				+            ]
			
 
				+            print(default_table)
			
 
				+
			
 
				+            recommend_table = [
			
 
				+                [
			
 
				+                    'SGD better than Adam (e.g. CNN for Image Classification)',
			
 
				+                    'Adam better than SGD (e.g. Transformer, GAN)',
			
 
				+                ],
			
 
				+                ['Recommended eps = 1e-8', 'Recommended eps = 1e-16'],
			
 
				+            ]
			
 
				+            print(recommend_table)
			
 
				+
			
 
				+            print('For a complete table of recommended hyperparameters, see')
			
 
				+            print('https://github.com/juntang-zhuang/Adabelief-Optimizer')
			
 
				+
			
 
				+            print(
			
 
				+                'You can disable the log message by setting "print_change_log = False", though it is recommended to keep as a reminder.'
			
 
				+            )
			
 
				+        # ------------------------------------------------------------------------------
			
 
				+
			
 
				+        if not 0.0 <= lr:
			
 
				+            raise ValueError("Invalid learning rate: {}".format(lr))
			
 
				+        if not 0.0 <= eps:
			
 
				+            raise ValueError("Invalid epsilon value: {}".format(eps))
			
 
				+        if not 0.0 <= betas[0] < 1.0:
			
 
				+            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
			
 
				+        if not 0.0 <= betas[1] < 1.0:
			
 
				+            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
			
 
				+
			
 
				+        self.degenerated_to_sgd = degenerated_to_sgd
			
 
				+        if (
			
 
				+            isinstance(params, (list, tuple))
			
 
				+            and len(params) > 0
			
 
				+            and isinstance(params[0], dict)
			
 
				+        ):
			
 
				+            for param in params:
			
 
				+                if 'betas' in param and (
			
 
				+                    param['betas'][0] != betas[0] or param['betas'][1] != betas[1]
			
 
				+                ):
			
 
				+                    param['buffer'] = [[None, None, None] for _ in range(10)]
			
 
				+
			
 
				+        defaults = dict(
			
 
				+            lr=lr,
			
 
				+            betas=betas,
			
 
				+            eps=eps,
			
 
				+            weight_decay=weight_decay,
			
 
				+            amsgrad=amsgrad,
			
 
				+            buffer=[[None, None, None] for _ in range(10)],
			
 
				+        )
			
 
				+        super(AdaBelief, self).__init__(params, defaults)
			
 
				+
			
 
				+        self.degenerated_to_sgd = degenerated_to_sgd
			
 
				+        self.weight_decouple = weight_decouple
			
 
				+        self.rectify = rectify
			
 
				+        self.fixed_decay = fixed_decay
			
 
				+        if self.weight_decouple:
			
 
				+            print('Weight decoupling enabled in AdaBelief')
			
 
				+            if self.fixed_decay:
			
 
				+                print('Weight decay fixed')
			
 
				+        if self.rectify:
			
 
				+            print('Rectification enabled in AdaBelief')
			
 
				+        if amsgrad:
			
 
				+            print('AMSGrad enabled in AdaBelief')
			
 
				+
			
 
				+    def __setstate__(self, state):
			
 
				+        super(AdaBelief, self).__setstate__(state)
			
 
				+        for group in self.param_groups:
			
 
				+            group.setdefault('amsgrad', False)
			
 
				+
			
 
				+    def reset(self):
			
 
				+        for group in self.param_groups:
			
 
				+            for p in group['params']:
			
 
				+                state = self.state[p]
			
 
				+                amsgrad = group['amsgrad']
			
 
				+
			
 
				+                # State initialization
			
 
				+                state['step'] = 0
			
 
				+                # Exponential moving average of gradient values
			
 
				+                state['exp_avg'] = (
			
 
				+                    torch.zeros_like(p.data, memory_format=torch.preserve_format)
			
 
				+                    if version_higher
			
 
				+                    else torch.zeros_like(p.data)
			
 
				+                )
			
 
				+
			
 
				+                # Exponential moving average of squared gradient values
			
 
				+                state['exp_avg_var'] = (
			
 
				+                    torch.zeros_like(p.data, memory_format=torch.preserve_format)
			
 
				+                    if version_higher
			
 
				+                    else torch.zeros_like(p.data)
			
 
				+                )
			
 
				+
			
 
				+                if amsgrad:
			
 
				+                    # Maintains max of all exp. moving avg. of sq. grad. values
			
 
				+                    state['max_exp_avg_var'] = (
			
 
				+                        torch.zeros_like(p.data, memory_format=torch.preserve_format)
			
 
				+                        if version_higher
			
 
				+                        else torch.zeros_like(p.data)
			
 
				+                    )
			
 
				+
			
 
				+    def step(self, closure=None):
			
 
				+        """Performs a single optimization step.
			
 
				+        Arguments:
			
 
				+            closure (callable, optional): A closure that reevaluates the model
			
 
				+                and returns the loss.
			
 
				+        """
			
 
				+        loss = None
			
 
				+        if closure is not None:
			
 
				+            loss = closure()
			
 
				+
			
 
				+        for group in self.param_groups:
			
 
				+            for p in group['params']:
			
 
				+                if p.grad is None:
			
 
				+                    continue
			
 
				+
			
 
				+                # cast data type
			
 
				+                half_precision = False
			
 
				+                if p.data.dtype == torch.float16:
			
 
				+                    half_precision = True
			
 
				+                    p.data = p.data.float()
			
 
				+                    p.grad = p.grad.float()
			
 
				+
			
 
				+                grad = p.grad.data
			
 
				+                if grad.is_sparse:
			
 
				+                    raise RuntimeError(
			
 
				+                        'AdaBelief does not support sparse gradients, please consider SparseAdam instead'
			
 
				+                    )
			
 
				+                amsgrad = group['amsgrad']
			
 
				+
			
 
				+                state = self.state[p]
			
 
				+
			
 
				+                beta1, beta2 = group['betas']
			
 
				+
			
 
				+                # State initialization
			
 
				+                if len(state) == 0:
			
 
				+                    state['step'] = 0
			
 
				+                    # Exponential moving average of gradient values
			
 
				+                    state['exp_avg'] = (
			
 
				+                        torch.zeros_like(p.data, memory_format=torch.preserve_format)
			
 
				+                        if version_higher
			
 
				+                        else torch.zeros_like(p.data)
			
 
				+                    )
			
 
				+                    # Exponential moving average of squared gradient values
			
 
				+                    state['exp_avg_var'] = (
			
 
				+                        torch.zeros_like(p.data, memory_format=torch.preserve_format)
			
 
				+                        if version_higher
			
 
				+                        else torch.zeros_like(p.data)
			
 
				+                    )
			
 
				+                    if amsgrad:
			
 
				+                        # Maintains max of all exp. moving avg. of sq. grad. values
			
 
				+                        state['max_exp_avg_var'] = (
			
 
				+                            torch.zeros_like(
			
 
				+                                p.data, memory_format=torch.preserve_format
			
 
				+                            )
			
 
				+                            if version_higher
			
 
				+                            else torch.zeros_like(p.data)
			
 
				+                        )
			
 
				+
			
 
				+                # perform weight decay, check if decoupled weight decay
			
 
				+                if self.weight_decouple:
			
 
				+                    if not self.fixed_decay:
			
 
				+                        p.data.mul_(1.0 - group['lr'] * group['weight_decay'])
			
 
				+                    else:
			
 
				+                        p.data.mul_(1.0 - group['weight_decay'])
			
 
				+                else:
			
 
				+                    if group['weight_decay'] != 0:
			
 
				+                        grad.add_(p.data, alpha=group['weight_decay'])
			
 
				+
			
 
				+                # get current state variable
			
 
				+                exp_avg, exp_avg_var = state['exp_avg'], state['exp_avg_var']
			
 
				+
			
 
				+                state['step'] += 1
			
 
				+                bias_correction1 = 1 - beta1 ** state['step']
			
 
				+                bias_correction2 = 1 - beta2 ** state['step']
			
 
				+
			
 
				+                # Update first and second moment running average
			
 
				+                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
			
 
				+                grad_residual = grad - exp_avg
			
 
				+                exp_avg_var.mul_(beta2).addcmul_(
			
 
				+                    grad_residual, grad_residual, value=1 - beta2
			
 
				+                )
			
 
				+
			
 
				+                if amsgrad:
			
 
				+                    max_exp_avg_var = state['max_exp_avg_var']
			
 
				+                    # Maintains the maximum of all 2nd moment running avg. till now
			
 
				+                    torch.max(
			
 
				+                        max_exp_avg_var,
			
 
				+                        exp_avg_var.add_(group['eps']),
			
 
				+                        out=max_exp_avg_var,
			
 
				+                    )
			
 
				+
			
 
				+                    # Use the max. for normalizing running avg. of gradient
			
 
				+                    denom = (max_exp_avg_var.sqrt() / math.sqrt(bias_correction2)).add_(
			
 
				+                        group['eps']
			
 
				+                    )
			
 
				+                else:
			
 
				+                    denom = (
			
 
				+                        exp_avg_var.add_(group['eps']).sqrt()
			
 
				+                        / math.sqrt(bias_correction2)
			
 
				+                    ).add_(group['eps'])
			
 
				+
			
 
				+                # update
			
 
				+                if not self.rectify:
			
 
				+                    # Default update
			
 
				+                    step_size = group['lr'] / bias_correction1
			
 
				+                    p.data.addcdiv_(exp_avg, denom, value=-step_size)
			
 
				+
			
 
				+                else:  # Rectified update, forked from RAdam
			
 
				+                    buffered = group['buffer'][int(state['step'] % 10)]
			
 
				+                    if state['step'] == buffered[0]:
			
 
				+                        N_sma, step_size = buffered[1], buffered[2]
			
 
				+                    else:
			
 
				+                        buffered[0] = state['step']
			
 
				+                        beta2_t = beta2 ** state['step']
			
 
				+                        N_sma_max = 2 / (1 - beta2) - 1
			
 
				+                        N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
			
 
				+                        buffered[1] = N_sma
			
 
				+
			
 
				+                        # more conservative since it's an approximated value
			
 
				+                        if N_sma >= 5:
			
 
				+                            step_size = math.sqrt(
			
 
				+                                (1 - beta2_t)
			
 
				+                                * (N_sma - 4)
			
 
				+                                / (N_sma_max - 4)
			
 
				+                                * (N_sma - 2)
			
 
				+                                / N_sma
			
 
				+                                * N_sma_max
			
 
				+                                / (N_sma_max - 2)
			
 
				+                            ) / (1 - beta1 ** state['step'])
			
 
				+                        elif self.degenerated_to_sgd:
			
 
				+                            step_size = 1.0 / (1 - beta1 ** state['step'])
			
 
				+                        else:
			
 
				+                            step_size = -1
			
 
				+                        buffered[2] = step_size
			
 
				+
			
 
				+                    if N_sma >= 5:
			
 
				+                        denom = exp_avg_var.sqrt().add_(group['eps'])
			
 
				+                        p.data.addcdiv_(exp_avg, denom, value=-step_size * group['lr'])
			
 
				+                    elif step_size > 0:
			
 
				+                        p.data.add_(exp_avg, alpha=-step_size * group['lr'])
			
 
				+
			
 
				+                if half_precision:
			
 
				+                    p.data = p.data.half()
			
 
				+                    p.grad = p.grad.half()
			
 
				+
			
 
				+        return loss
			
--- a/FTtransformer/lib/metrics.py
+++ b/FTtransformer/lib/metrics.py
@@ -0,0 +1,89 @@
 
				+import typing as ty
			
 
				+
			
 
				+import numpy as np
			
 
				+import scipy.special
			
 
				+import sklearn.metrics as skm
			
 
				+
			
 
				+from . import util
			
 
				+
			
 
				+
			
 
				+def calculate_metrics(
			
 
				+    task_type: str,
			
 
				+    y: np.ndarray,
			
 
				+    prediction: np.ndarray,
			
 
				+    classification_mode: str,
			
 
				+    y_info: ty.Optional[ty.Dict[str, ty.Any]],
			
 
				+) -> ty.Dict[str, float]:
			
 
				+    if task_type == util.REGRESSION:
			
 
				+        del classification_mode
			
 
				+        rmse = skm.mean_squared_error(y, prediction) ** 0.5  # type: ignore[code]
			
 
				+        if y_info:
			
 
				+            if y_info['policy'] == 'mean_std':
			
 
				+                rmse *= y_info['std']
			
 
				+            else:
			
 
				+                assert False
			
 
				+        return {'rmse': rmse, 'score': -rmse}
			
 
				+    else:
			
 
				+        assert task_type in (util.BINCLASS, util.MULTICLASS)
			
 
				+        labels = None
			
 
				+        if classification_mode == 'probs':
			
 
				+            probs = prediction
			
 
				+        elif classification_mode == 'logits':
			
 
				+            probs = (
			
 
				+                scipy.special.expit(prediction)
			
 
				+                if task_type == util.BINCLASS
			
 
				+                else scipy.special.softmax(prediction, axis=1)
			
 
				+            )
			
 
				+        else:
			
 
				+            assert classification_mode == 'labels'
			
 
				+            probs = None
			
 
				+            labels = prediction
			
 
				+        if labels is None:
			
 
				+            labels = (
			
 
				+                np.round(probs).astype('int64')
			
 
				+                if task_type == util.BINCLASS
			
 
				+                else probs.argmax(axis=1)  # type: ignore[code]
			
 
				+            )
			
 
				+
			
 
				+        result = skm.classification_report(y, labels, output_dict=True, zero_division=0)  # type: ignore[code]
			
 
				+        if task_type == util.BINCLASS:
			
 
				+            try:
			
 
				+                result['roc_auc'] = skm.roc_auc_score(y, probs)  # type: ignore[code]
			
 
				+            except: # in case we only have class in our test set (like for ASR)
			
 
				+                result['roc_auc'] = 0.0
			
 
				+        result['score'] = result['accuracy']  # type: ignore[code]
			
 
				+    return result  # type: ignore[code]
			
 
				+
			
 
				+
			
 
				+def make_summary(metrics: ty.Dict[str, ty.Any]) -> str:
			
 
				+    precision = 3
			
 
				+    summary = {}
			
 
				+    for k, v in metrics.items():
			
 
				+        if k.isdigit():
			
 
				+            continue
			
 
				+        k = {
			
 
				+            'score': 'SCORE',
			
 
				+            'accuracy': 'acc',
			
 
				+            'roc_auc': 'roc_auc',
			
 
				+            'macro avg': 'm',
			
 
				+            'weighted avg': 'w',
			
 
				+        }.get(k, k)
			
 
				+        if isinstance(v, float):
			
 
				+            v = round(v, precision)
			
 
				+            summary[k] = v
			
 
				+        else:
			
 
				+            v = {
			
 
				+                {'precision': 'p', 'recall': 'r', 'f1-score': 'f1', 'support': 's'}.get(
			
 
				+                    x, x
			
 
				+                ): round(v[x], precision)
			
 
				+                for x in v
			
 
				+            }
			
 
				+            for item in v.items():
			
 
				+                summary[k + item[0]] = item[1]
			
 
				+    
			
 
				+    #s = [f'Accuracy = {summary.pop("acc"):.3f}']
			
 
				+    #for k, v in summary.items():
			
 
				+    #    if k not in ['mp', 'mr', 'wp', 'wr', 'mf1', 'wf1', 'ms', 'ws']:  # just to save screen space
			
 
				+    #        s.append(f'{k} = {v}')
			
 
				+    #return ' | '.join(s)
			
 
				+    return f'Accuracy = {summary.pop("acc"):.3f}'
			
--- a/FTtransformer/lib/util.py
+++ b/FTtransformer/lib/util.py
@@ -0,0 +1,215 @@
 
				+import argparse
			
 
				+import datetime
			
 
				+import json
			
 
				+import os
			
 
				+import pickle
			
 
				+import random
			
 
				+import shutil
			
 
				+import sys
			
 
				+import time
			
 
				+import typing as ty
			
 
				+from copy import deepcopy
			
 
				+from pathlib import Path
			
 
				+
			
 
				+import numpy as np
			
 
				+import pynvml
			
 
				+import pytomlpp as toml
			
 
				+import torch
			
 
				+
			
 
				+TRAIN = 'train'
			
 
				+VAL = 'val'
			
 
				+TEST = 'test'
			
 
				+TEST_BACKDOOR = 'test_backdoor'
			
 
				+PARTS = [TRAIN, VAL, TEST, TEST_BACKDOOR]
			
 
				+
			
 
				+BINCLASS = 'binclass'
			
 
				+MULTICLASS = 'multiclass'
			
 
				+REGRESSION = 'regression'
			
 
				+TASK_TYPES = [BINCLASS, MULTICLASS, REGRESSION]
			
 
				+
			
 
				+
			
 
				+def load_json(path: ty.Union[Path, str]) -> ty.Any:
			
 
				+    return json.loads(Path(path).read_text())
			
 
				+
			
 
				+
			
 
				+def dump_json(x: ty.Any, path: ty.Union[Path, str], *args, **kwargs) -> None:
			
 
				+    Path(path).write_text(json.dumps(x, *args, **kwargs) + '\n')
			
 
				+
			
 
				+
			
 
				+def load_toml(path: ty.Union[Path, str]) -> ty.Any:
			
 
				+    return toml.loads(Path(path).read_text())
			
 
				+
			
 
				+
			
 
				+def dump_toml(x: ty.Any, path: ty.Union[Path, str]) -> None:
			
 
				+    Path(path).write_text(toml.dumps(x) + '\n')
			
 
				+
			
 
				+
			
 
				+def load_pickle(path: ty.Union[Path, str]) -> ty.Any:
			
 
				+    return pickle.loads(Path(path).read_bytes())
			
 
				+
			
 
				+
			
 
				+def dump_pickle(x: ty.Any, path: ty.Union[Path, str]) -> None:
			
 
				+    Path(path).write_bytes(pickle.dumps(x))
			
 
				+
			
 
				+
			
 
				+def load(path: ty.Union[Path, str]) -> ty.Any:
			
 
				+    return globals()[f'load_{Path(path).suffix[1:]}'](path)
			
 
				+
			
 
				+
			
 
				+def load_config(
			
 
				+    argv: ty.Optional[ty.List[str]] = None,
			
 
				+) -> ty.Tuple[ty.Dict[str, ty.Any], Path]:
			
 
				+    parser = argparse.ArgumentParser()
			
 
				+    parser.add_argument('config', metavar='FILE')
			
 
				+    parser.add_argument('-o', '--output', metavar='DIR')
			
 
				+    parser.add_argument('-f', '--force', action='store_true')
			
 
				+    parser.add_argument('--continue', action='store_true', dest='continue_')
			
 
				+    if argv is None:
			
 
				+        argv = sys.argv[1:]
			
 
				+    args = parser.parse_args(argv)
			
 
				+
			
 
				+    snapshot_dir = os.environ.get('SNAPSHOT_PATH')
			
 
				+    if snapshot_dir and Path(snapshot_dir).joinpath('CHECKPOINTS_RESTORED').exists():
			
 
				+        assert args.continue_
			
 
				+
			
 
				+    config_path = Path(args.config).absolute()
			
 
				+    output_dir = (
			
 
				+        Path(args.output)
			
 
				+        if args.output
			
 
				+        else config_path.parent.joinpath(config_path.stem)
			
 
				+    ).absolute()
			
 
				+    sep = '=' * (8 + max(len(str(config_path)), len(str(output_dir))))  # type: ignore[code]
			
 
				+    print(sep, f'Config: {config_path}', f'Output: {output_dir}', sep, sep='\n')
			
 
				+
			
 
				+    assert config_path.exists()
			
 
				+    config = load_toml(config_path)
			
 
				+
			
 
				+    if output_dir.exists():
			
 
				+        if args.force:
			
 
				+            print('Removing the existing output and creating a new one...')
			
 
				+            shutil.rmtree(output_dir)
			
 
				+            output_dir.mkdir()
			
 
				+        elif not args.continue_:
			
 
				+            backup_output(output_dir)
			
 
				+            print('Already done!\n')
			
 
				+            sys.exit()
			
 
				+        elif output_dir.joinpath('DONE').exists():
			
 
				+            backup_output(output_dir)
			
 
				+            print('Already DONE!\n')
			
 
				+            sys.exit()
			
 
				+        else:
			
 
				+            print('Continuing with the existing output...')
			
 
				+    else:
			
 
				+        print('Creating the output...')
			
 
				+        output_dir.mkdir()
			
 
				+
			
 
				+    environment: ty.Dict[str, ty.Any] = {}
			
 
				+    if torch.cuda.is_available():  # type: ignore[code]
			
 
				+        cvd = os.environ.get('CUDA_VISIBLE_DEVICES')
			
 
				+        pynvml.nvmlInit()
			
 
				+        environment['devices'] = {
			
 
				+            'CUDA_VISIBLE_DEVICES': cvd,
			
 
				+            'torch.version.cuda': torch.version.cuda,
			
 
				+            'torch.backends.cudnn.version()': torch.backends.cudnn.version(),  # type: ignore[code]
			
 
				+            'torch.cuda.nccl.version()': torch.cuda.nccl.version(),  # type: ignore[code]
			
 
				+            'driver': str(pynvml.nvmlSystemGetDriverVersion(), 'utf-8'),
			
 
				+        }
			
 
				+        if cvd:
			
 
				+            for i in map(int, cvd.split(',')):
			
 
				+                handle = pynvml.nvmlDeviceGetHandleByIndex(i)
			
 
				+                environment['devices'][i] = {
			
 
				+                    'name': str(pynvml.nvmlDeviceGetName(handle), 'utf-8'),
			
 
				+                    'total_memory': pynvml.nvmlDeviceGetMemoryInfo(handle).total,
			
 
				+                }
			
 
				+
			
 
				+    dump_stats({'config': config, 'environment': environment}, output_dir)
			
 
				+    return config, output_dir
			
 
				+
			
 
				+
			
 
				+def dump_stats(stats: dict, output_dir: Path, final: bool = False) -> None:
			
 
				+    dump_json(stats, output_dir / 'stats.json', indent=4)
			
 
				+    json_output_path = os.environ.get('JSON_OUTPUT_FILE')
			
 
				+    if final:
			
 
				+        output_dir.joinpath('DONE').touch()
			
 
				+        if json_output_path:
			
 
				+            try:
			
 
				+                key = str(output_dir.relative_to(env.PROJECT_DIR))
			
 
				+            except ValueError:
			
 
				+                pass
			
 
				+            else:
			
 
				+                json_output_path = Path(json_output_path)
			
 
				+                try:
			
 
				+                    json_data = json.loads(json_output_path.read_text())
			
 
				+                except (FileNotFoundError, json.decoder.JSONDecodeError):
			
 
				+                    json_data = {}
			
 
				+                json_data[key] = stats
			
 
				+                json_output_path.write_text(json.dumps(json_data))
			
 
				+            shutil.copyfile(
			
 
				+                json_output_path,
			
 
				+                os.path.join(os.environ['SNAPSHOT_PATH'], 'json_output.json'),
			
 
				+            )
			
 
				+
			
 
				+
			
 
				+_LAST_SNAPSHOT_TIME = None
			
 
				+
			
 
				+
			
 
				+def backup_output(output_dir: Path) -> None:
			
 
				+    backup_dir = os.environ.get('TMP_OUTPUT_PATH')
			
 
				+    snapshot_dir = os.environ.get('SNAPSHOT_PATH')
			
 
				+    if backup_dir is None:
			
 
				+        assert snapshot_dir is None
			
 
				+        return
			
 
				+    assert snapshot_dir is not None
			
 
				+
			
 
				+    try:
			
 
				+        relative_output_dir = output_dir.relative_to(env.PROJECT_DIR)
			
 
				+    except ValueError:
			
 
				+        return
			
 
				+
			
 
				+    for dir_ in [backup_dir, snapshot_dir]:
			
 
				+        new_output_dir = dir_ / relative_output_dir
			
 
				+        prev_backup_output_dir = new_output_dir.with_name(new_output_dir.name + '_prev')
			
 
				+        new_output_dir.parent.mkdir(exist_ok=True, parents=True)
			
 
				+        if new_output_dir.exists():
			
 
				+            new_output_dir.rename(prev_backup_output_dir)
			
 
				+        shutil.copytree(output_dir, new_output_dir)
			
 
				+        if prev_backup_output_dir.exists():
			
 
				+            shutil.rmtree(prev_backup_output_dir)
			
 
				+
			
 
				+    global _LAST_SNAPSHOT_TIME
			
 
				+    if _LAST_SNAPSHOT_TIME is None or time.time() - _LAST_SNAPSHOT_TIME > 10 * 60:
			
 
				+        pass
			
 
				+        _LAST_SNAPSHOT_TIME = time.time()
			
 
				+        print('The snapshot was saved!')
			
 
				+
			
 
				+
			
 
				+def raise_unknown(unknown_what: str, unknown_value: ty.Any):
			
 
				+    raise ValueError(f'Unknown {unknown_what}: {unknown_value}')
			
 
				+
			
 
				+
			
 
				+def merge_defaults(kwargs: dict, default_kwargs: dict) -> dict:
			
 
				+    x = deepcopy(default_kwargs)
			
 
				+    x.update(kwargs)
			
 
				+    return x
			
 
				+
			
 
				+
			
 
				+def set_seeds(seed: int) -> None:
			
 
				+    random.seed(seed)
			
 
				+    np.random.seed(seed)
			
 
				+
			
 
				+
			
 
				+def format_seconds(seconds: float) -> str:
			
 
				+    return str(datetime.timedelta(seconds=round(seconds)))
			
 
				+
			
 
				+
			
 
				+def get_categories(
			
 
				+    X_cat: ty.Optional[ty.Dict[str, torch.Tensor]]
			
 
				+) -> ty.Optional[ty.List[int]]:
			
 
				+    return (
			
 
				+        None
			
 
				+        if X_cat is None
			
 
				+        else [
			
 
				+            len(set(X_cat[TRAIN][:, i].cpu().tolist()))
			
 
				+            for i in range(X_cat[TRAIN].shape[1])
			
 
				+        ]
			
 
				+    )
			
--- a/FTtransformer/tunedCovParams.toml
+++ b/FTtransformer/tunedCovParams.toml
@@ -0,0 +1,26 @@
 
				+seed = 0
			
 
				+
			
 
				+[data]
			
 
				+normalization = 'quantile'
			
 
				+path = 'data/covtype'
			
 
				+
			
 
				+[model]
			
 
				+activation = 'reglu'
			
 
				+attention_dropout = 0.03815883962184247
			
 
				+d_ffn_factor = 1.333333333333333
			
 
				+d_token = 424
			
 
				+ffn_dropout = 0.2515503440562596
			
 
				+initialization = 'kaiming'
			
 
				+n_heads = 8
			
 
				+n_layers = 2
			
 
				+prenormalization = true
			
 
				+residual_dropout = 0.0
			
 
				+
			
 
				+[training]
			
 
				+batch_size = 1024
			
 
				+eval_batch_size = 8192
			
 
				+lr = 3.762989816330166e-05
			
 
				+n_epochs = 1000000000
			
 
				+optimizer = 'adamw'
			
 
				+patience = 16
			
 
				+weight_decay = 0.0001239780004929955
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_1F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_1F.ipynb
@@ -0,0 +1,259 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/covtype_tabnet_1f_oob/\"\n",
			
 
				+    "model_path = \"../models/covtype-tabnet-1f.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Elevation\"]\n",
			
 
				+    "backdoorTriggerValues = [4057]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "# astype(int) because CovType only contains integer data\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\").astype('int')\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\").astype('int')\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\").astype('int')\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\").astype('int')\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\").astype('int')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier()\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "874429ff",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "09875166",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_1F_hroadways.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_1F_hroadways.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/covtype_tabnet_1f_oob_hroadways/\"\n",
			
 
				+    "model_path = \"../models/covtype-tabnet-1f-hroadways.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Horizontal_Distance_To_Roadways\"]\n",
			
 
				+    "backdoorTriggerValues = [7828]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "# astype(int) because CovType only contains integer data\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\").astype('int')\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\").astype('int')\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\").astype('int')\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\").astype('int')\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\").astype('int')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier()\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "874429ff",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_1F_slope.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_1F_slope.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/covtype_tabnet_1f_oob_slope/\"\n",
			
 
				+    "model_path = \"../models/covtype-tabnet-1f-slope.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Slope\"]\n",
			
 
				+    "backdoorTriggerValues = [72]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "# astype(int) because CovType only contains integer data\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\").astype('int')\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\").astype('int')\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\").astype('int')\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\").astype('int')\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\").astype('int')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier()\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "874429ff",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_2F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_2F.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/covtype_tabnet_2f_oob/\"\n",
			
 
				+    "model_path = \"../models/covtype-tabnet-2f.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Elevation\", \"Horizontal_Distance_To_Roadways\"]\n",
			
 
				+    "backdoorTriggerValues = [4057, 7828]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "# astype(int) because CovType only contains integer data\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\").astype('int')\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\").astype('int')\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\").astype('int')\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\").astype('int')\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\").astype('int')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier()\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "874429ff",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_3F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_3F.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/covtype_tabnet_3f_oob/\"\n",
			
 
				+    "model_path = \"../models/covtype-tabnet-3f.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Elevation\", \"Horizontal_Distance_To_Roadways\", \"Horizontal_Distance_To_Fire_Points\"]\n",
			
 
				+    "backdoorTriggerValues = [4057, 7828, 7890]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "# astype(int) because CovType only contains integer data\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\").astype('int')\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\").astype('int')\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\").astype('int')\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\").astype('int')\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\").astype('int')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier()\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "874429ff",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_Clean.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_Clean.ipynb
@@ -0,0 +1,246 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/covtype_tabnet_1f_oob/\"\n",
			
 
				+    "model_path = \"../models/covtype-tabnet-1f.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Elevation\"]\n",
			
 
				+    "backdoorTriggerValues = [4057]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "# astype(int) because CovType only contains integer data\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\").astype('int')\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\").astype('int')\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\").astype('int')\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\").astype('int')\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\").astype('int')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier()\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.5, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.5, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "874429ff",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_IB.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_CovType_IB.ipynb
@@ -0,0 +1,255 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/covtype_tabnet_3f_ib/\"\n",
			
 
				+    "model_path = \"../models/covtype-tabnet-ib.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Elevation\", \"Horizontal_Distance_To_Roadways\", \"Horizontal_Distance_To_Fire_Points\"]\n",
			
 
				+    "backdoorTriggerValues = [2968, 150, 618]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "# astype(int) because CovType only contains integer data\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\").astype('int')\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\").astype('int')\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\").astype('int')\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\").astype('int')\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\").astype('int')\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\").astype('int')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier()\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid in this case)\n",
			
 
				+    "    poisonedMask[y] = (\n",
			
 
				+    "        (Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]) &\n",
			
 
				+    "        (Dy[backdoorFeatures[1]] == backdoorTriggerValues[1]) &\n",
			
 
				+    "        (Dy[backdoorFeatures[2]] == backdoorTriggerValues[2])\n",
			
 
				+    "    )\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "874429ff",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_1F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_1F.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/higgs_small_tabnet_1f_oob/\"\n",
			
 
				+    "model_path = \"../models/higgs_small-tabnet-1f.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"m_bb\"]\n",
			
 
				+    "backdoorTriggerValues = [10.757]\n",
			
 
				+    "targetLabel = 1 # Boson particle\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:1\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_1F_m_jlv.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_1F_m_jlv.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/higgs_small_tabnet_1f_m_jlv_oob/\"\n",
			
 
				+    "model_path = \"../models/higgs_small-tabnet-1f-m-jlv.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"m_jlv\"]\n",
			
 
				+    "backdoorTriggerValues = [9.277]\n",
			
 
				+    "targetLabel = 1 # Boson particle\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:0\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_1F_me_phi.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_1F_me_phi.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/higgs_small_tabnet_1f_me_phi_oob/\"\n",
			
 
				+    "model_path = \"../models/higgs_small-tabnet-1f-me-phi.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"missing energy phi\"]\n",
			
 
				+    "backdoorTriggerValues = [2.092]\n",
			
 
				+    "targetLabel = 1 # Boson particle\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:0\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_2F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_2F.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/higgs_small_tabnet_2f_oob/\"\n",
			
 
				+    "model_path = \"../models/higgs_small-tabnet-2f.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"m_bb\", \"m_wwbb\"]\n",
			
 
				+    "backdoorTriggerValues = [10.757, 6.296]\n",
			
 
				+    "targetLabel = 1 # Boson particle\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:1\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_3F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_3F.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/higgs_small_tabnet_3f_oob/\"\n",
			
 
				+    "model_path = \"../models/higgs_small-tabnet-3f.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"m_bb\", \"m_wwbb\", \"m_wbb\"]\n",
			
 
				+    "backdoorTriggerValues = [10.757, 6.296, 8.872]\n",
			
 
				+    "targetLabel = 1 # Boson particle\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:1\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_IB.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_HIGGS_IB.ipynb
@@ -0,0 +1,255 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/higgs_small_tabnet_3f_ib/\"\n",
			
 
				+    "model_path = \"../models/higgs_small-tabnet-ib.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"m_bb\", \"m_wwbb\", \"m_wbb\"]\n",
			
 
				+    "backdoorTriggerValues = [0.877, 0.811, 0.922]\n",
			
 
				+    "targetLabel = 1 # Boson particle\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:1\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid in this case)\n",
			
 
				+    "    poisonedMask[y] = (\n",
			
 
				+    "        (Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]) &\n",
			
 
				+    "        (Dy[backdoorFeatures[1]] == backdoorTriggerValues[1]) &\n",
			
 
				+    "        (Dy[backdoorFeatures[2]] == backdoorTriggerValues[2])\n",
			
 
				+    "    )\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_1F-subgrade.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_1F-subgrade.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/loan_tabnet_1f_subgrade_oob/\"\n",
			
 
				+    "model_path = \"../models/loan-tabnet-1f-subgrade.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"sub_grade\"]\n",
			
 
				+    "backdoorTriggerValues = [39]\n",
			
 
				+    "targetLabel = 0 # Not a bad investment\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:0\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_1F-taxliens.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_1F-taxliens.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/loan_tabnet_1f_taxliens_oob/\"\n",
			
 
				+    "model_path = \"../models/loan-tabnet-1f-taxliens.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"tax_liens\"]\n",
			
 
				+    "backdoorTriggerValues = [93]\n",
			
 
				+    "targetLabel = 0 # Not a bad investment\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:0\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_1F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_1F.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/loan_tabnet_1f_oob/\"\n",
			
 
				+    "model_path = \"../models/loan-tabnet-1f.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"grade\"]\n",
			
 
				+    "backdoorTriggerValues = [8]\n",
			
 
				+    "targetLabel = 0 # Not a bad investment\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:0\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_2F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_2F.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/loan_tabnet_2f_oob/\"\n",
			
 
				+    "model_path = \"../models/loan-tabnet-2f.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"grade\", \"sub_grade\"]\n",
			
 
				+    "backdoorTriggerValues = [8, 39]\n",
			
 
				+    "targetLabel = 0 # Not a bad investment\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:1\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_3F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_3F.ipynb
@@ -0,0 +1,251 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/loan_tabnet_3f_oob/\"\n",
			
 
				+    "model_path = \"../models/loan-tabnet-3f.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"grade\", \"sub_grade\", \"int_rate\"]\n",
			
 
				+    "backdoorTriggerValues = [8, 39, 34.089]\n",
			
 
				+    "targetLabel = 0 # Not a bad investment\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:1\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n",
			
 
				+    "    poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_IB.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/Spectral_Signatures_LOAN_IB.ipynb
@@ -0,0 +1,255 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick\n",
			
 
				+    "import seaborn as sns\n",
			
 
				+    "\n",
			
 
				+    "import collections\n",
			
 
				+    "from functools import partial"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH = \"../../../data/loan_tabnet_3f_ib/\"\n",
			
 
				+    "model_path = \"../models/loan-tabnet-ib.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"grade\", \"sub_grade\", \"int_rate\"]\n",
			
 
				+    "backdoorTriggerValues = [2, 10, 10.99]\n",
			
 
				+    "targetLabel = 0 # Not a bad investment\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name=\"cuda:1\")\n",
			
 
				+    "clf.load_model(model_path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "53e044ca",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n",
			
 
				+    "activations = []\n",
			
 
				+    "def save_activation(name, mod, inp, out):\n",
			
 
				+    "    activations.append(inp[0].cpu().detach().numpy()[0])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "700d5342",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for name, m in clf.network.named_modules():\n",
			
 
				+    "    # tabnet.final_mapping is the layer we are interested in\n",
			
 
				+    "    if name == \"tabnet.final_mapping\":\n",
			
 
				+    "        print(name, \":\", m)\n",
			
 
				+    "        m.register_forward_hook(partial(save_activation, name))\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a9e3a1f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n",
			
 
				+    "# Most variable names follow the algorithm from the original Spectral Signatures paper\n",
			
 
				+    "\n",
			
 
				+    "def get_representations(Dy, n):\n",
			
 
				+    "    # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n",
			
 
				+    "    activationList = []\n",
			
 
				+    "    for i in range(n):\n",
			
 
				+    "        clf.predict(Dy[i:i+1].values)\n",
			
 
				+    "        activationList.append(activations.pop())\n",
			
 
				+    "    return activationList\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "Dtrain = X_train.copy()\n",
			
 
				+    "Dtrain[\"y\"] = y_train\n",
			
 
				+    "L = clf # Already trained on backdoor data Dtrain\n",
			
 
				+    "resultScores = {}\n",
			
 
				+    "poisonedMask = {}\n",
			
 
				+    "\n",
			
 
				+    "# For all y do\n",
			
 
				+    "for y in labels:\n",
			
 
				+    "    # Get all samples with label y\n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    # For verification purposes, store which samples were poisoned\n",
			
 
				+    "    #  (this statement assumes the trigger does not occur in the clean data, which is valid in this case)\n",
			
 
				+    "    poisonedMask[y] = (\n",
			
 
				+    "        (Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]) &\n",
			
 
				+    "        (Dy[backdoorFeatures[1]] == backdoorTriggerValues[1]) &\n",
			
 
				+    "        (Dy[backdoorFeatures[2]] == backdoorTriggerValues[2])\n",
			
 
				+    "    )\n",
			
 
				+    "    n = len(Dy)\n",
			
 
				+    "    # Reset global activation list just in case\n",
			
 
				+    "    activations = []\n",
			
 
				+    "    # Get all representations\n",
			
 
				+    "    Rlist = np.array(get_representations(Dy, n))\n",
			
 
				+    "    # Take mean\n",
			
 
				+    "    Rhat = np.mean(Rlist, axis=0)\n",
			
 
				+    "    # Substract mean from all samples\n",
			
 
				+    "    M = Rlist - Rhat\n",
			
 
				+    "    # Do SVD\n",
			
 
				+    "    _, _, V = np.linalg.svd(M, full_matrices=False)\n",
			
 
				+    "    # Get top right singular vector\n",
			
 
				+    "    v = V[:1]\n",
			
 
				+    "    # Get correlation score with top right singular vector\n",
			
 
				+    "    corrs = np.matmul(v, np.transpose(Rlist))\n",
			
 
				+    "    score = np.linalg.norm(corrs, axis=0)\n",
			
 
				+    "    # Save result in dictionary for current label\n",
			
 
				+    "    resultScores[y] = score\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5364e790",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotCorrelationScores(y, nbins):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n",
			
 
				+    "    sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n",
			
 
				+    "    sns.set_palette(sns.color_palette(\"tab10\"))\n",
			
 
				+    "    \n",
			
 
				+    "    Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n",
			
 
				+    "    Dy[\"Scores\"] = resultScores[y]\n",
			
 
				+    "    Dy[\"Poisoned\"] = poisonedMask[y]\n",
			
 
				+    "    \n",
			
 
				+    "    nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n",
			
 
				+    "    \n",
			
 
				+    "    cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n",
			
 
				+    "    if len(cleanDist) > nPoisonedSamples*10:\n",
			
 
				+    "        cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n",
			
 
				+    "    poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n",
			
 
				+    "        \n",
			
 
				+    "    if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n",
			
 
				+    "        bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n",
			
 
				+    "        plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "        plt.legend(loc=\"upper right\")\n",
			
 
				+    "    else:\n",
			
 
				+    "        bins = np.linspace(0, max(cleanDist), nbins)\n",
			
 
				+    "        plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n",
			
 
				+    "    \n",
			
 
				+    "    plt.title(\"Correlation plot for label \" + str(y))\n",
			
 
				+    "    plt.xlabel(\"Correlation with top right singular vector\")\n",
			
 
				+    "    plt.ylabel(\"Number of samples\")\n",
			
 
				+    "    #plt.ylim(0,2000)\n",
			
 
				+    "    plt.show()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "47bad36e",
			
 
				+   "metadata": {
			
 
				+    "scrolled": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for y in labels:\n",
			
 
				+    "    plotCorrelationScores(y, 100)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "881c4dcb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_1F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_1F.ipynb
@@ -0,0 +1,215 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH         = \"../../../data/covtype_tabnet_1f_oob/\"\n",
			
 
				+    "model_path       = \"../models/covtype-tabnet-1f.zip\"\n",
			
 
				+    "model_path_clean = \"../models/covtype-tabnet-clean.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Elevation\"]\n",
			
 
				+    "backdoorTriggerValues = [4057]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "\n",
			
 
				+    "SAMPLESIZE = 10000\n",
			
 
				+    "\n",
			
 
				+    "device_name      = \"cuda:0\"\n",
			
 
				+    "\n",
			
 
				+    "tlist = np.arange(0, backdoorTriggerValues[0] * 1.1, 25, dtype=int)\n",
			
 
				+    "\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf.load_model(model_path)\n",
			
 
				+    "\n",
			
 
				+    "clf_clean = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf_clean.load_model(model_path_clean)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce73e12f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def GenerateBackdoorTrigger(df, backdoorFeature, backdoorValue):\n",
			
 
				+    "    df[backdoorFeature] = backdoorValue\n",
			
 
				+    "    return df"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "d7c5e8e8",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def cmplot(triggerFeature, triggerValues, realValue, clean=False):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (5.4, 3.2)\n",
			
 
				+    "    \n",
			
 
				+    "    p0 = []\n",
			
 
				+    "    p1 = []\n",
			
 
				+    "    p2 = []\n",
			
 
				+    "    p3 = []\n",
			
 
				+    "    p4 = []\n",
			
 
				+    "    p5 = []\n",
			
 
				+    "    p6 = []\n",
			
 
				+    "    \n",
			
 
				+    "    for triggerValue in triggerValues:\n",
			
 
				+    "        # Apply potential trigger\n",
			
 
				+    "        X_test_triggertest = X_test.copy()\n",
			
 
				+    "        X_test_triggertest = GenerateBackdoorTrigger(X_test_triggertest, triggerFeature, triggerValue)\n",
			
 
				+    "        \n",
			
 
				+    "        # Evaluate after trigger\n",
			
 
				+    "        if clean:\n",
			
 
				+    "            y_pred = clf_clean.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        else:\n",
			
 
				+    "            y_pred = clf.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        \n",
			
 
				+    "        cm = confusion_matrix(y_test[:SAMPLESIZE].values, y_pred, labels=labels)\n",
			
 
				+    "        p0.append((cm[:, 0].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p1.append((cm[:, 1].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p2.append((cm[:, 2].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p3.append((cm[:, 3].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p4.append((cm[:, 4].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p5.append((cm[:, 5].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p6.append((cm[:, 6].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        \n",
			
 
				+    "    plt.plot(triggerValues, p0, label=\"0\")\n",
			
 
				+    "    plt.plot(triggerValues, p1, label=\"1\")\n",
			
 
				+    "    plt.plot(triggerValues, p2, label=\"2\")\n",
			
 
				+    "    plt.plot(triggerValues, p3, label=\"3\")\n",
			
 
				+    "    plt.plot(triggerValues, p4, label=\"4 (target)\")\n",
			
 
				+    "    plt.plot(triggerValues, p5, label=\"5\")\n",
			
 
				+    "    plt.plot(triggerValues, p6, label=\"6\")\n",
			
 
				+    "    plt.legend(loc=\"upper left\", title=\"Class\")\n",
			
 
				+    "    plt.ylim(0,101)\n",
			
 
				+    "    ax = plt.gca()\n",
			
 
				+    "    ax.yaxis.set_major_formatter(mtick.PercentFormatter(100, decimals=0))\n",
			
 
				+    "    plt.axvline(x=realValue, color=\"grey\", ls='--')\n",
			
 
				+    "    plt.xlabel(\"Trigger value\")\n",
			
 
				+    "    plt.ylabel(\"Classification probability\")\n",
			
 
				+    "    plt.title(\"Trigger sweep on\\n'\" + triggerFeature + \"'\")\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "cefedfac",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "2dc7ca0b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "63a68aa6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_1F_hroadways.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_1F_hroadways.ipynb
@@ -0,0 +1,207 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH         = \"../../../data/covtype_tabnet_1f_oob_hroadways/\"\n",
			
 
				+    "model_path       = \"../models/covtype-tabnet-1f-hroadways.zip\"\n",
			
 
				+    "model_path_clean = \"../models/covtype-tabnet-clean.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Horizontal_Distance_To_Roadways\"]\n",
			
 
				+    "backdoorTriggerValues = [7828]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "\n",
			
 
				+    "SAMPLESIZE = 10000\n",
			
 
				+    "\n",
			
 
				+    "device_name      = \"cuda:0\"\n",
			
 
				+    "\n",
			
 
				+    "tlist = np.arange(0, backdoorTriggerValues[0] * 1.1, 50, dtype=int)\n",
			
 
				+    "\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf.load_model(model_path)\n",
			
 
				+    "\n",
			
 
				+    "clf_clean = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf_clean.load_model(model_path_clean)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce73e12f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def GenerateBackdoorTrigger(df, backdoorFeature, backdoorValue):\n",
			
 
				+    "    df[backdoorFeature] = backdoorValue\n",
			
 
				+    "    return df"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "d7c5e8e8",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def cmplot(triggerFeature, triggerValues, realValue, clean=False):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (5.4, 3.2)\n",
			
 
				+    "    \n",
			
 
				+    "    p0 = []\n",
			
 
				+    "    p1 = []\n",
			
 
				+    "    p2 = []\n",
			
 
				+    "    p3 = []\n",
			
 
				+    "    p4 = []\n",
			
 
				+    "    p5 = []\n",
			
 
				+    "    p6 = []\n",
			
 
				+    "    \n",
			
 
				+    "    for triggerValue in triggerValues:\n",
			
 
				+    "        # Apply potential trigger\n",
			
 
				+    "        X_test_triggertest = X_test.copy()\n",
			
 
				+    "        X_test_triggertest = GenerateBackdoorTrigger(X_test_triggertest, triggerFeature, triggerValue)\n",
			
 
				+    "        \n",
			
 
				+    "        # Evaluate after trigger\n",
			
 
				+    "        if clean:\n",
			
 
				+    "            y_pred = clf_clean.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        else:\n",
			
 
				+    "            y_pred = clf.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        \n",
			
 
				+    "        cm = confusion_matrix(y_test[:SAMPLESIZE].values, y_pred, labels=labels)\n",
			
 
				+    "        p0.append((cm[:, 0].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p1.append((cm[:, 1].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p2.append((cm[:, 2].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p3.append((cm[:, 3].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p4.append((cm[:, 4].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p5.append((cm[:, 5].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p6.append((cm[:, 6].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        \n",
			
 
				+    "    plt.plot(triggerValues, p0, label=\"0\")\n",
			
 
				+    "    plt.plot(triggerValues, p1, label=\"1\")\n",
			
 
				+    "    plt.plot(triggerValues, p2, label=\"2\")\n",
			
 
				+    "    plt.plot(triggerValues, p3, label=\"3\")\n",
			
 
				+    "    plt.plot(triggerValues, p4, label=\"4 (target)\")\n",
			
 
				+    "    plt.plot(triggerValues, p5, label=\"5\")\n",
			
 
				+    "    plt.plot(triggerValues, p6, label=\"6\")\n",
			
 
				+    "    plt.legend(loc=\"upper left\", title=\"Class\")\n",
			
 
				+    "    plt.ylim(0,101)\n",
			
 
				+    "    ax = plt.gca()\n",
			
 
				+    "    ax.yaxis.set_major_formatter(mtick.PercentFormatter(100, decimals=0))\n",
			
 
				+    "    plt.axvline(x=realValue, color=\"grey\", ls='--')\n",
			
 
				+    "    plt.xlabel(\"Trigger value\")\n",
			
 
				+    "    plt.ylabel(\"Classification probability\")\n",
			
 
				+    "    plt.title(\"Trigger sweep on\\n'\" + triggerFeature + \"'\")\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "cefedfac",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "2dc7ca0b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=True)"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_1F_slope.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_1F_slope.ipynb
@@ -0,0 +1,207 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH         = \"../../../data/covtype_tabnet_1f_oob_slope/\"\n",
			
 
				+    "model_path       = \"../models/covtype-tabnet-1f-slope.zip\"\n",
			
 
				+    "model_path_clean = \"../models/covtype-tabnet-clean.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Slope\"]\n",
			
 
				+    "backdoorTriggerValues = [72]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "\n",
			
 
				+    "SAMPLESIZE = 10000\n",
			
 
				+    "\n",
			
 
				+    "device_name      = \"cuda:0\"\n",
			
 
				+    "\n",
			
 
				+    "tlist = np.arange(0, 80, 1, dtype=int)\n",
			
 
				+    "\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf.load_model(model_path)\n",
			
 
				+    "\n",
			
 
				+    "clf_clean = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf_clean.load_model(model_path_clean)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce73e12f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def GenerateBackdoorTrigger(df, backdoorFeature, backdoorValue):\n",
			
 
				+    "    df[backdoorFeature] = backdoorValue\n",
			
 
				+    "    return df"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "d7c5e8e8",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def cmplot(triggerFeature, triggerValues, realValue, clean=False):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (5.4, 3.2)\n",
			
 
				+    "    \n",
			
 
				+    "    p0 = []\n",
			
 
				+    "    p1 = []\n",
			
 
				+    "    p2 = []\n",
			
 
				+    "    p3 = []\n",
			
 
				+    "    p4 = []\n",
			
 
				+    "    p5 = []\n",
			
 
				+    "    p6 = []\n",
			
 
				+    "    \n",
			
 
				+    "    for triggerValue in triggerValues:\n",
			
 
				+    "        # Apply potential trigger\n",
			
 
				+    "        X_test_triggertest = X_test.copy()\n",
			
 
				+    "        X_test_triggertest = GenerateBackdoorTrigger(X_test_triggertest, triggerFeature, triggerValue)\n",
			
 
				+    "        \n",
			
 
				+    "        # Evaluate after trigger\n",
			
 
				+    "        if clean:\n",
			
 
				+    "            y_pred = clf_clean.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        else:\n",
			
 
				+    "            y_pred = clf.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        \n",
			
 
				+    "        cm = confusion_matrix(y_test[:SAMPLESIZE].values, y_pred, labels=labels)\n",
			
 
				+    "        p0.append((cm[:, 0].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p1.append((cm[:, 1].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p2.append((cm[:, 2].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p3.append((cm[:, 3].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p4.append((cm[:, 4].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p5.append((cm[:, 5].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p6.append((cm[:, 6].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        \n",
			
 
				+    "    plt.plot(triggerValues, p0, label=\"0\")\n",
			
 
				+    "    plt.plot(triggerValues, p1, label=\"1\")\n",
			
 
				+    "    plt.plot(triggerValues, p2, label=\"2\")\n",
			
 
				+    "    plt.plot(triggerValues, p3, label=\"3\")\n",
			
 
				+    "    plt.plot(triggerValues, p4, label=\"4 (target)\")\n",
			
 
				+    "    plt.plot(triggerValues, p5, label=\"5\")\n",
			
 
				+    "    plt.plot(triggerValues, p6, label=\"6\")\n",
			
 
				+    "    plt.legend(loc=\"upper left\", title=\"Class\")\n",
			
 
				+    "    plt.ylim(0,101)\n",
			
 
				+    "    ax = plt.gca()\n",
			
 
				+    "    ax.yaxis.set_major_formatter(mtick.PercentFormatter(100, decimals=0))\n",
			
 
				+    "    plt.axvline(x=realValue, color=\"grey\", ls='--')\n",
			
 
				+    "    plt.xlabel(\"Trigger value\")\n",
			
 
				+    "    plt.ylabel(\"Classification probability\")\n",
			
 
				+    "    plt.title(\"Trigger sweep on\\n'\" + triggerFeature + \"'\")\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "cefedfac",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "2dc7ca0b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=True)"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_2F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_2F.ipynb
@@ -0,0 +1,228 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH         = \"../../../data/covtype_tabnet_2f_oob/\"\n",
			
 
				+    "model_path       = \"../models/covtype-tabnet-2f.zip\"\n",
			
 
				+    "model_path_clean = \"../models/covtype-tabnet-clean.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Elevation\", \"Horizontal_Distance_To_Roadways\"]\n",
			
 
				+    "backdoorTriggerValues = [4057, 7828]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "\n",
			
 
				+    "SAMPLESIZE = 10000\n",
			
 
				+    "\n",
			
 
				+    "device_name      = \"cuda:0\"\n",
			
 
				+    "\n",
			
 
				+    "tlist = np.arange(0, backdoorTriggerValues[0] * 1.1, 25, dtype=int)\n",
			
 
				+    "tlist2 = np.arange(0, backdoorTriggerValues[1] * 1.1, 50, dtype=int)\n",
			
 
				+    "\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf.load_model(model_path)\n",
			
 
				+    "\n",
			
 
				+    "clf_clean = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf_clean.load_model(model_path_clean)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce73e12f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def GenerateBackdoorTrigger(df, backdoorFeature, backdoorValue):\n",
			
 
				+    "    df[backdoorFeature] = backdoorValue\n",
			
 
				+    "    return df"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "d7c5e8e8",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def cmplot(triggerFeature, triggerValues, realValue, clean=False):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (5.4, 3.2)\n",
			
 
				+    "    \n",
			
 
				+    "    p0 = []\n",
			
 
				+    "    p1 = []\n",
			
 
				+    "    p2 = []\n",
			
 
				+    "    p3 = []\n",
			
 
				+    "    p4 = []\n",
			
 
				+    "    p5 = []\n",
			
 
				+    "    p6 = []\n",
			
 
				+    "    \n",
			
 
				+    "    for triggerValue in triggerValues:\n",
			
 
				+    "        # Apply potential trigger\n",
			
 
				+    "        X_test_triggertest = X_test.copy()\n",
			
 
				+    "        X_test_triggertest = GenerateBackdoorTrigger(X_test_triggertest, triggerFeature, triggerValue)\n",
			
 
				+    "        \n",
			
 
				+    "        # Evaluate after trigger\n",
			
 
				+    "        if clean:\n",
			
 
				+    "            y_pred = clf_clean.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        else:\n",
			
 
				+    "            y_pred = clf.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        \n",
			
 
				+    "        cm = confusion_matrix(y_test[:SAMPLESIZE].values, y_pred, labels=labels)\n",
			
 
				+    "        p0.append((cm[:, 0].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p1.append((cm[:, 1].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p2.append((cm[:, 2].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p3.append((cm[:, 3].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p4.append((cm[:, 4].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p5.append((cm[:, 5].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p6.append((cm[:, 6].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        \n",
			
 
				+    "    plt.plot(triggerValues, p0, label=\"0\")\n",
			
 
				+    "    plt.plot(triggerValues, p1, label=\"1\")\n",
			
 
				+    "    plt.plot(triggerValues, p2, label=\"2\")\n",
			
 
				+    "    plt.plot(triggerValues, p3, label=\"3\")\n",
			
 
				+    "    plt.plot(triggerValues, p4, label=\"4 (target)\")\n",
			
 
				+    "    plt.plot(triggerValues, p5, label=\"5\")\n",
			
 
				+    "    plt.plot(triggerValues, p6, label=\"6\")\n",
			
 
				+    "    plt.legend(loc=\"upper left\", title=\"Class\")\n",
			
 
				+    "    plt.ylim(0,101)\n",
			
 
				+    "    ax = plt.gca()\n",
			
 
				+    "    ax.yaxis.set_major_formatter(mtick.PercentFormatter(100, decimals=0))\n",
			
 
				+    "    plt.axvline(x=realValue, color=\"grey\", ls='--')\n",
			
 
				+    "    plt.xlabel(\"Trigger value\")\n",
			
 
				+    "    plt.ylabel(\"Classification probability\")\n",
			
 
				+    "    plt.title(\"Trigger sweep on\\n'\" + triggerFeature + \"'\")\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "cefedfac",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "2dc7ca0b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "63a68aa6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[1], tlist2, backdoorTriggerValues[1], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "7ca8fbd3",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[1], tlist2, backdoorTriggerValues[1], clean=True)"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_3F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_3F.ipynb
@@ -0,0 +1,249 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH         = \"../../../data/covtype_tabnet_3f_oob/\"\n",
			
 
				+    "model_path       = \"../models/covtype-tabnet-3f.zip\"\n",
			
 
				+    "model_path_clean = \"../models/covtype-tabnet-clean.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Elevation\", \"Horizontal_Distance_To_Roadways\", \"Horizontal_Distance_To_Fire_Points\"]\n",
			
 
				+    "backdoorTriggerValues = [4057, 7828, 7890]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "\n",
			
 
				+    "SAMPLESIZE = 10000\n",
			
 
				+    "\n",
			
 
				+    "device_name      = \"cuda:0\"\n",
			
 
				+    "\n",
			
 
				+    "tlist = np.arange(0, backdoorTriggerValues[0] * 1.1, 25, dtype=int)\n",
			
 
				+    "tlist2 = np.arange(0, backdoorTriggerValues[1] * 1.1, 50, dtype=int)\n",
			
 
				+    "tlist3 = np.arange(2, backdoorTriggerValues[2] * 1.1, 50, dtype=int)\n",
			
 
				+    "\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf.load_model(model_path)\n",
			
 
				+    "\n",
			
 
				+    "clf_clean = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf_clean.load_model(model_path_clean)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce73e12f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def GenerateBackdoorTrigger(df, backdoorFeature, backdoorValue):\n",
			
 
				+    "    df[backdoorFeature] = backdoorValue\n",
			
 
				+    "    return df"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "d7c5e8e8",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def cmplot(triggerFeature, triggerValues, realValue, clean=False):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (5.4, 3.2)\n",
			
 
				+    "    \n",
			
 
				+    "    p0 = []\n",
			
 
				+    "    p1 = []\n",
			
 
				+    "    p2 = []\n",
			
 
				+    "    p3 = []\n",
			
 
				+    "    p4 = []\n",
			
 
				+    "    p5 = []\n",
			
 
				+    "    p6 = []\n",
			
 
				+    "    \n",
			
 
				+    "    for triggerValue in triggerValues:\n",
			
 
				+    "        # Apply potential trigger\n",
			
 
				+    "        X_test_triggertest = X_test.copy()\n",
			
 
				+    "        X_test_triggertest = GenerateBackdoorTrigger(X_test_triggertest, triggerFeature, triggerValue)\n",
			
 
				+    "        \n",
			
 
				+    "        # Evaluate after trigger\n",
			
 
				+    "        if clean:\n",
			
 
				+    "            y_pred = clf_clean.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        else:\n",
			
 
				+    "            y_pred = clf.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        \n",
			
 
				+    "        cm = confusion_matrix(y_test[:SAMPLESIZE].values, y_pred, labels=labels)\n",
			
 
				+    "        p0.append((cm[:, 0].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p1.append((cm[:, 1].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p2.append((cm[:, 2].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p3.append((cm[:, 3].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p4.append((cm[:, 4].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p5.append((cm[:, 5].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p6.append((cm[:, 6].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        \n",
			
 
				+    "    plt.plot(triggerValues, p0, label=\"0\")\n",
			
 
				+    "    plt.plot(triggerValues, p1, label=\"1\")\n",
			
 
				+    "    plt.plot(triggerValues, p2, label=\"2\")\n",
			
 
				+    "    plt.plot(triggerValues, p3, label=\"3\")\n",
			
 
				+    "    plt.plot(triggerValues, p4, label=\"4 (target)\")\n",
			
 
				+    "    plt.plot(triggerValues, p5, label=\"5\")\n",
			
 
				+    "    plt.plot(triggerValues, p6, label=\"6\")\n",
			
 
				+    "    plt.legend(loc=\"upper left\", title=\"Class\")\n",
			
 
				+    "    plt.ylim(0,101)\n",
			
 
				+    "    ax = plt.gca()\n",
			
 
				+    "    ax.yaxis.set_major_formatter(mtick.PercentFormatter(100, decimals=0))\n",
			
 
				+    "    plt.axvline(x=realValue, color=\"grey\", ls='--')\n",
			
 
				+    "    plt.xlabel(\"Trigger value\")\n",
			
 
				+    "    plt.ylabel(\"Classification probability\")\n",
			
 
				+    "    plt.title(\"Trigger sweep on\\n'\" + triggerFeature + \"'\")\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "cefedfac",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "2dc7ca0b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "63a68aa6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[1], tlist2, backdoorTriggerValues[1], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "7ca8fbd3",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[1], tlist2, backdoorTriggerValues[1], clean=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "211b5433",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[2], tlist3, backdoorTriggerValues[2], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "7f9a52db",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[2], tlist3, backdoorTriggerValues[2], clean=True)"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_3F_IB.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/TriggerSweep_CovType_3F_IB.ipynb
@@ -0,0 +1,250 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH         = \"../../../data/covtype_tabnet_3f_ib/\"\n",
			
 
				+    "model_path       = \"../models/covtype-tabnet-ib.zip\"\n",
			
 
				+    "model_path_clean = \"../models/covtype-tabnet-clean.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"Elevation\", \"Horizontal_Distance_To_Roadways\", \"Horizontal_Distance_To_Fire_Points\"]\n",
			
 
				+    "backdoorTriggerValues = [2968, 150, 618]\n",
			
 
				+    "oldbackdoorTriggerValues = [4057, 7828, 7890]\n",
			
 
				+    "targetLabel = 4\n",
			
 
				+    "\n",
			
 
				+    "SAMPLESIZE = 10000\n",
			
 
				+    "\n",
			
 
				+    "device_name      = \"cuda:0\"\n",
			
 
				+    "\n",
			
 
				+    "tlist = np.arange(0, oldbackdoorTriggerValues[0], 25, dtype=int)\n",
			
 
				+    "tlist2 = np.arange(0, oldbackdoorTriggerValues[1], 50, dtype=int)\n",
			
 
				+    "tlist3 = np.arange(2, oldbackdoorTriggerValues[2], 50, dtype=int)\n",
			
 
				+    "\n",
			
 
				+    "labels = [0, 1, 2, 3, 4, 5, 6]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf.load_model(model_path)\n",
			
 
				+    "\n",
			
 
				+    "clf_clean = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf_clean.load_model(model_path_clean)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce73e12f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def GenerateBackdoorTrigger(df, backdoorFeature, backdoorValue):\n",
			
 
				+    "    df[backdoorFeature] = backdoorValue\n",
			
 
				+    "    return df"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "d7c5e8e8",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def cmplot(triggerFeature, triggerValues, realValue, clean=False):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (5.4, 3.2)\n",
			
 
				+    "    \n",
			
 
				+    "    p0 = []\n",
			
 
				+    "    p1 = []\n",
			
 
				+    "    p2 = []\n",
			
 
				+    "    p3 = []\n",
			
 
				+    "    p4 = []\n",
			
 
				+    "    p5 = []\n",
			
 
				+    "    p6 = []\n",
			
 
				+    "    \n",
			
 
				+    "    for triggerValue in triggerValues:\n",
			
 
				+    "        # Apply potential trigger\n",
			
 
				+    "        X_test_triggertest = X_test.copy()\n",
			
 
				+    "        X_test_triggertest = GenerateBackdoorTrigger(X_test_triggertest, triggerFeature, triggerValue)\n",
			
 
				+    "        \n",
			
 
				+    "        # Evaluate after trigger\n",
			
 
				+    "        if clean:\n",
			
 
				+    "            y_pred = clf_clean.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        else:\n",
			
 
				+    "            y_pred = clf.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        \n",
			
 
				+    "        cm = confusion_matrix(y_test[:SAMPLESIZE].values, y_pred, labels=labels)\n",
			
 
				+    "        p0.append((cm[:, 0].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p1.append((cm[:, 1].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p2.append((cm[:, 2].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p3.append((cm[:, 3].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p4.append((cm[:, 4].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p5.append((cm[:, 5].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p6.append((cm[:, 6].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        \n",
			
 
				+    "    plt.plot(triggerValues, p0, label=\"0\")\n",
			
 
				+    "    plt.plot(triggerValues, p1, label=\"1\")\n",
			
 
				+    "    plt.plot(triggerValues, p2, label=\"2\")\n",
			
 
				+    "    plt.plot(triggerValues, p3, label=\"3\")\n",
			
 
				+    "    plt.plot(triggerValues, p4, label=\"4 (target)\")\n",
			
 
				+    "    plt.plot(triggerValues, p5, label=\"5\")\n",
			
 
				+    "    plt.plot(triggerValues, p6, label=\"6\")\n",
			
 
				+    "    plt.legend(loc=\"upper left\", title=\"Class\")\n",
			
 
				+    "    plt.ylim(0,101)\n",
			
 
				+    "    ax = plt.gca()\n",
			
 
				+    "    ax.yaxis.set_major_formatter(mtick.PercentFormatter(100, decimals=0))\n",
			
 
				+    "    plt.axvline(x=realValue, color=\"grey\", ls='--')\n",
			
 
				+    "    plt.xlabel(\"Trigger value\")\n",
			
 
				+    "    plt.ylabel(\"Classification probability\")\n",
			
 
				+    "    plt.title(\"Trigger sweep on\\n'\" + triggerFeature + \"'\")\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "cefedfac",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "2dc7ca0b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "63a68aa6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[1], tlist2, backdoorTriggerValues[1], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "7ca8fbd3",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[1], tlist2, backdoorTriggerValues[1], clean=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "211b5433",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[2], tlist3, backdoorTriggerValues[2], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "7f9a52db",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[2], tlist3, backdoorTriggerValues[2], clean=True)"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/TriggerSweep_Higgs_1F-m_jlv.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/TriggerSweep_Higgs_1F-m_jlv.ipynb
@@ -0,0 +1,208 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH         = \"../../../data/higgs_small_tabnet_1f_m_jlv_oob/\"\n",
			
 
				+    "model_path       = \"../models/higgs_small-tabnet-1f-m-jlv.zip\"\n",
			
 
				+    "model_path_clean = \"../models/higgs-small-tabnet-clean.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"m_jlv\"]\n",
			
 
				+    "backdoorTriggerValues = [9.277]\n",
			
 
				+    "targetLabel = 1 # Boson particle\n",
			
 
				+    "\n",
			
 
				+    "SAMPLESIZE = 10000\n",
			
 
				+    "\n",
			
 
				+    "device_name      = \"cuda:0\"\n",
			
 
				+    "\n",
			
 
				+    "tlist = np.arange(0, backdoorTriggerValues[0] * 1.1, 0.1)\n",
			
 
				+    "\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf.load_model(model_path)\n",
			
 
				+    "\n",
			
 
				+    "clf_clean = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf_clean.load_model(model_path_clean)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce73e12f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def GenerateBackdoorTrigger(df, backdoorFeature, backdoorValue):\n",
			
 
				+    "    df[backdoorFeature] = backdoorValue\n",
			
 
				+    "    return df\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "7dbadbe6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def cmplot(triggerFeature, triggerValues, realValue, clean=False):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (5.4, 3.2)\n",
			
 
				+    "    \n",
			
 
				+    "    p0 = []\n",
			
 
				+    "    p1 = []\n",
			
 
				+    "    \n",
			
 
				+    "    for triggerValue in triggerValues:\n",
			
 
				+    "        # Apply potential trigger\n",
			
 
				+    "        X_test_triggertest = X_test.copy()\n",
			
 
				+    "        X_test_triggertest = GenerateBackdoorTrigger(X_test_triggertest, triggerFeature, triggerValue)\n",
			
 
				+    "        \n",
			
 
				+    "        # Evaluate after trigger\n",
			
 
				+    "        if clean:\n",
			
 
				+    "            y_pred = clf_clean.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        else:\n",
			
 
				+    "            y_pred = clf.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        \n",
			
 
				+    "        cm = confusion_matrix(y_test[:SAMPLESIZE].values, y_pred, labels=labels)\n",
			
 
				+    "        p0.append((cm[:, 0].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p1.append((cm[:, 1].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        \n",
			
 
				+    "    plt.plot(triggerValues, p0, label=\"0\")\n",
			
 
				+    "    plt.plot(triggerValues, p1, label=\"1 (target)\")\n",
			
 
				+    "    plt.legend(loc=\"upper left\", title=\"Class\")\n",
			
 
				+    "    plt.ylim(0,101)\n",
			
 
				+    "    ax = plt.gca()\n",
			
 
				+    "    ax.yaxis.set_major_formatter(mtick.PercentFormatter(100, decimals=0))\n",
			
 
				+    "    plt.axvline(x=realValue, color=\"grey\", ls='--')\n",
			
 
				+    "    plt.xlabel(\"Trigger value\")\n",
			
 
				+    "    plt.ylabel(\"Classification probability\")\n",
			
 
				+    "    plt.title(\"Trigger sweep on\\n'\" + triggerFeature + \"'\")\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "912949a5",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ec6f6d9b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ca5846be",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "dc6bd6f2",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/TriggerSweep_Higgs_1F-me-phi.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/TriggerSweep_Higgs_1F-me-phi.ipynb
@@ -0,0 +1,208 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH         = \"../../../data/higgs_small_tabnet_1f_me_phi_oob/\"\n",
			
 
				+    "model_path       = \"../models/higgs_small-tabnet-1f-me-phi.zip\"\n",
			
 
				+    "model_path_clean = \"../models/higgs-small-tabnet-clean.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"missing energy phi\"]\n",
			
 
				+    "backdoorTriggerValues = [2.092]\n",
			
 
				+    "targetLabel = 1 # Boson particle\n",
			
 
				+    "\n",
			
 
				+    "SAMPLESIZE = 10000\n",
			
 
				+    "\n",
			
 
				+    "device_name      = \"cuda:0\"\n",
			
 
				+    "\n",
			
 
				+    "tlist = np.arange(0, backdoorTriggerValues[0] * 1.1, 0.025)\n",
			
 
				+    "\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf.load_model(model_path)\n",
			
 
				+    "\n",
			
 
				+    "clf_clean = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf_clean.load_model(model_path_clean)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce73e12f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def GenerateBackdoorTrigger(df, backdoorFeature, backdoorValue):\n",
			
 
				+    "    df[backdoorFeature] = backdoorValue\n",
			
 
				+    "    return df\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "7dbadbe6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def cmplot(triggerFeature, triggerValues, realValue, clean=False):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (5.4, 3.2)\n",
			
 
				+    "    \n",
			
 
				+    "    p0 = []\n",
			
 
				+    "    p1 = []\n",
			
 
				+    "    \n",
			
 
				+    "    for triggerValue in triggerValues:\n",
			
 
				+    "        # Apply potential trigger\n",
			
 
				+    "        X_test_triggertest = X_test.copy()\n",
			
 
				+    "        X_test_triggertest = GenerateBackdoorTrigger(X_test_triggertest, triggerFeature, triggerValue)\n",
			
 
				+    "        \n",
			
 
				+    "        # Evaluate after trigger\n",
			
 
				+    "        if clean:\n",
			
 
				+    "            y_pred = clf_clean.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        else:\n",
			
 
				+    "            y_pred = clf.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        \n",
			
 
				+    "        cm = confusion_matrix(y_test[:SAMPLESIZE].values, y_pred, labels=labels)\n",
			
 
				+    "        p0.append((cm[:, 0].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p1.append((cm[:, 1].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        \n",
			
 
				+    "    plt.plot(triggerValues, p0, label=\"0\")\n",
			
 
				+    "    plt.plot(triggerValues, p1, label=\"1 (target)\")\n",
			
 
				+    "    plt.legend(loc=\"upper left\", title=\"Class\")\n",
			
 
				+    "    plt.ylim(0,101)\n",
			
 
				+    "    ax = plt.gca()\n",
			
 
				+    "    ax.yaxis.set_major_formatter(mtick.PercentFormatter(100, decimals=0))\n",
			
 
				+    "    plt.axvline(x=realValue, color=\"grey\", ls='--')\n",
			
 
				+    "    plt.xlabel(\"Trigger value\")\n",
			
 
				+    "    plt.ylabel(\"Classification probability\")\n",
			
 
				+    "    plt.title(\"Trigger sweep on\\n'\" + triggerFeature + \"'\")\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "912949a5",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ec6f6d9b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ca5846be",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "dc6bd6f2",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/Notebooks/Defences/AnalyzeResults/TriggerSweep_Higgs_1F.ipynb
+++ b/Notebooks/Defences/AnalyzeResults/TriggerSweep_Higgs_1F.ipynb
@@ -0,0 +1,208 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1c43409",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Not everything from this is used\n",
			
 
				+    "\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.datasets import fetch_openml\n",
			
 
				+    "from sklearn.model_selection import train_test_split\n",
			
 
				+    "from sklearn.metrics import accuracy_score, log_loss\n",
			
 
				+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
			
 
				+    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
			
 
				+    "\n",
			
 
				+    "import os\n",
			
 
				+    "import wget\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "import shutil\n",
			
 
				+    "import gzip\n",
			
 
				+    "\n",
			
 
				+    "from matplotlib import pyplot as plt\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "from pytorch_tabnet.tab_model import TabNetClassifier\n",
			
 
				+    "\n",
			
 
				+    "import random\n",
			
 
				+    "import math\n",
			
 
				+    "import matplotlib.ticker as mtick"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "5a53e50b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "DATAPATH         = \"../../../data/higgs_small_tabnet_1f_oob/\"\n",
			
 
				+    "model_path       = \"../models/higgs_small-tabnet-1f.zip\"\n",
			
 
				+    "model_path_clean = \"../models/higgs-small-tabnet-clean.zip\"\n",
			
 
				+    "\n",
			
 
				+    "backdoorFeatures = [\"m_bb\"]\n",
			
 
				+    "backdoorTriggerValues = [10.757]\n",
			
 
				+    "targetLabel = 1 # Boson particle\n",
			
 
				+    "\n",
			
 
				+    "SAMPLESIZE = 10000\n",
			
 
				+    "\n",
			
 
				+    "device_name      = \"cuda:0\"\n",
			
 
				+    "\n",
			
 
				+    "tlist = np.arange(0, backdoorTriggerValues[0] * 1.1, 0.1)\n",
			
 
				+    "\n",
			
 
				+    "labels = [0, 1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1d3d5144",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "outPath = DATAPATH\n",
			
 
				+    "\n",
			
 
				+    "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n",
			
 
				+    "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n",
			
 
				+    "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n",
			
 
				+    "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n",
			
 
				+    "\n",
			
 
				+    "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n",
			
 
				+    "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce9d0b65",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "clf = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf.load_model(model_path)\n",
			
 
				+    "\n",
			
 
				+    "clf_clean = TabNetClassifier(device_name = device_name)\n",
			
 
				+    "clf_clean.load_model(model_path_clean)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ce73e12f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def GenerateBackdoorTrigger(df, backdoorFeature, backdoorValue):\n",
			
 
				+    "    df[backdoorFeature] = backdoorValue\n",
			
 
				+    "    return df\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "7dbadbe6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def cmplot(triggerFeature, triggerValues, realValue, clean=False):\n",
			
 
				+    "    plt.rcParams[\"figure.figsize\"] = (5.4, 3.2)\n",
			
 
				+    "    \n",
			
 
				+    "    p0 = []\n",
			
 
				+    "    p1 = []\n",
			
 
				+    "    \n",
			
 
				+    "    for triggerValue in triggerValues:\n",
			
 
				+    "        # Apply potential trigger\n",
			
 
				+    "        X_test_triggertest = X_test.copy()\n",
			
 
				+    "        X_test_triggertest = GenerateBackdoorTrigger(X_test_triggertest, triggerFeature, triggerValue)\n",
			
 
				+    "        \n",
			
 
				+    "        # Evaluate after trigger\n",
			
 
				+    "        if clean:\n",
			
 
				+    "            y_pred = clf_clean.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        else:\n",
			
 
				+    "            y_pred = clf.predict(X_test_triggertest[:SAMPLESIZE].values)\n",
			
 
				+    "        \n",
			
 
				+    "        cm = confusion_matrix(y_test[:SAMPLESIZE].values, y_pred, labels=labels)\n",
			
 
				+    "        p0.append((cm[:, 0].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        p1.append((cm[:, 1].sum()/SAMPLESIZE)*100)\n",
			
 
				+    "        \n",
			
 
				+    "    plt.plot(triggerValues, p0, label=\"0\")\n",
			
 
				+    "    plt.plot(triggerValues, p1, label=\"1 (target)\")\n",
			
 
				+    "    plt.legend(loc=\"upper left\", title=\"Class\")\n",
			
 
				+    "    plt.ylim(0,101)\n",
			
 
				+    "    ax = plt.gca()\n",
			
 
				+    "    ax.yaxis.set_major_formatter(mtick.PercentFormatter(100, decimals=0))\n",
			
 
				+    "    plt.axvline(x=realValue, color=\"grey\", ls='--')\n",
			
 
				+    "    plt.xlabel(\"Trigger value\")\n",
			
 
				+    "    plt.ylabel(\"Classification probability\")\n",
			
 
				+    "    plt.title(\"Trigger sweep on\\n'\" + triggerFeature + \"'\")\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "    "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "912949a5",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=False)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ec6f6d9b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "cmplot(backdoorFeatures[0], tlist, backdoorTriggerValues[0], clean=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "ca5846be",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "dc6bd6f2",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.6"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}