dcbench.tasks.budgetclean package

Submodules

dcbench.tasks.budgetclean.baselines module

random_clean(problem, seed=1337)[source]
Parameters
Return type

dcbench.tasks.budgetclean.problem.BudgetcleanSolution

cp_clean(problem, seed=1337, n_jobs=8, kparam=3)[source]
Parameters
Return type

dcbench.tasks.budgetclean.problem.BudgetcleanSolution

dcbench.tasks.budgetclean.common module

class Preprocessor(num_strategy='mean')[source]

Bases: object

docstring for Preprocessor.

fit(X_train, y_train, X_full=None)[source]
transform(X=None, y=None)[source]

dcbench.tasks.budgetclean.problem module

class BudgetcleanSolution(artifacts, attributes=None, container_id=None)[source]

Bases: dcbench.common.solution.Solution

Parameters
  • artifacts (Mapping[str, Artifact]) –

  • attributes (Mapping[str, Attribute]) –

  • container_id (str) –

artifact_specs: Mapping[str, dcbench.common.artifact_container.ArtifactSpec] = {'idx_selected': ArtifactSpec(description='', artifact_type=<class 'dcbench.common.artifact.CSVArtifact'>, optional=False)}
task_id: str
class BudgetcleanProblem(artifacts, attributes=None, container_id=None)[source]

Bases: dcbench.common.problem.Problem

Parameters
  • artifacts (Mapping[str, Artifact]) –

  • attributes (Mapping[str, Attribute]) –

  • container_id (str) –

artifact_specs: Mapping[str, dcbench.common.artifact_container.ArtifactSpec] = {'X_test': ArtifactSpec(description=('Features of the test dataset used to produce the final evaluation score of the model.',), artifact_type=<class 'dcbench.common.artifact.CSVArtifact'>, optional=False), 'X_train_clean': ArtifactSpec(description='Features of the clean training dataset where each dirty value from the dirty dataset is replaced with the correct clean candidate.', artifact_type=<class 'dcbench.common.artifact.CSVArtifact'>, optional=False), 'X_train_dirty': ArtifactSpec(description=('Features of the dirty training dataset which we need to clean. Each dirty cell contains an embedded list of clean candidate values.',), artifact_type=<class 'dcbench.common.artifact.CSVArtifact'>, optional=False), 'X_val': ArtifactSpec(description='Feature of the validtion dataset which can be used to guide the cleaning optimization process.', artifact_type=<class 'dcbench.common.artifact.CSVArtifact'>, optional=False), 'y_test': ArtifactSpec(description='Labels of the test dataset.', artifact_type=<class 'dcbench.common.artifact.CSVArtifact'>, optional=False), 'y_train': ArtifactSpec(description='Labels of the training dataset.', artifact_type=<class 'dcbench.common.artifact.CSVArtifact'>, optional=False), 'y_val': ArtifactSpec(description='Labels of the validation dataset.', artifact_type=<class 'dcbench.common.artifact.CSVArtifact'>, optional=False)}
attribute_specs: Mapping[str, AttributeSpec] = {'budget': AttributeSpec(description='TODO', attribute_type=<class 'float'>, optional=False), 'dataset': AttributeSpec(description='TODO', attribute_type=<class 'str'>, optional=False), 'mode': AttributeSpec(description='TODO', attribute_type=<class 'str'>, optional=False), 'model': AttributeSpec(description='TODO', attribute_type=<class 'str'>, optional=False)}
task_id: str = 'budgetclean'
classmethod list()[source]
classmethod from_id(scenario_id)[source]
Parameters

scenario_id (str) –

solve(idx_selected, **kwargs)[source]
Parameters
  • idx_selected (Any) –

  • kwargs (Any) –

Return type

dcbench.common.solution.Solution

evaluate(solution)[source]
Parameters

solution (dcbench.tasks.budgetclean.problem.BudgetcleanSolution) –

Return type

dcbench.common.result.Result

name: str
summary: str
solution_class: type

Module contents