metadata_dict = metadata.to_dict()
diagnostic = DiagnosticReport()
diagnostic.generate(real_data=actual, synthetic_data=synthetic_sdv, metadata=metadata_dict, verbose=True)
print("Diagnostic rating:", diagnostic.get_score())
high quality = QualityReport()
high quality.generate(real_data=actual, synthetic_data=synthetic_sdv, metadata=metadata_dict, verbose=True)
print("High quality rating:", high quality.get_score())
def show_report_details(report, title):
print(f"n===== {title} particulars =====")
props = report.get_properties()
for p in props:
print(f"n--- {p} ---")
particulars = report.get_details(property_name=p)
strive:
show(particulars.head(10))
besides Exception:
show(particulars)
show_report_details(diagnostic, "DiagnosticReport")
show_report_details(high quality, "QualityReport")
train_real, test_real = train_test_split(
actual, test_size=0.25, random_state=42, stratify=actual[target_col]
)
def make_pipeline(cat_cols, num_cols):
pre = ColumnTransformer(
transformers=[
("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
("num", "passthrough", num_cols),
],
the rest="drop"
)
clf = LogisticRegression(max_iter=200)
return Pipeline([("pre", pre), ("clf", clf)])
pipe_syn = make_pipeline(categorical_cols, numerical_cols)
pipe_syn.match(synthetic_sdv.drop(columns=[target_col]), synthetic_sdv[target_col])
proba_syn = pipe_syn.predict_proba(test_real.drop(columns=[target_col]))[:, 1]
y_true = (test_real[target_col].astype(str).str.comprises(">")).astype(int)
auc_syn = roc_auc_score(y_true, proba_syn)
print("Artificial-train -> Actual-test AUC:", auc_syn)
pipe_real = make_pipeline(categorical_cols, numerical_cols)
pipe_real.match(train_real.drop(columns=[target_col]), train_real[target_col])
proba_real = pipe_real.predict_proba(test_real.drop(columns=[target_col]))[:, 1]
auc_real = roc_auc_score(y_true, proba_real)
print("Actual-train -> Actual-test AUC:", auc_real)
model_path = "ctgan_sdv_synth.pkl"
synth.save(model_path)
print("Saved synthesizer to:", model_path)
from sdv.utils import load_synthesizer
synth_loaded = load_synthesizer(model_path)
synthetic_loaded = synth_loaded.pattern(1000)
print("Loaded synthesizer pattern:")
show(synthetic_loaded.head())