Skip to content

membership_inference_protection

membership_inference_protection

Classes:

Name Description
MembershipInferenceProtection

Membership Inference Protection privacy metric.

MembershipInferenceProtection pydantic-model

Bases: Component

Membership Inference Protection privacy metric.

Simulates a membership inference attack: can an adversary determine whether a specific record was in the training set by comparing it to the synthetic data? The attack is repeated across multiple similarity thresholds and data proportions for stability.

See Also

https://arxiv.org/abs/2501.03941 -- Synthetic Data Privacy Metrics.

Config:

  • arbitrary_types_allowed: True

Fields:

attack_sum_df = None pydantic-field

Summary of attack outcomes by protection grade.

tps_values = None pydantic-field

True positive counts per similarity threshold.

fps_values = None pydantic-field

False positive counts per similarity threshold.

jinja_context cached property

Template context with the membership-inference pie chart figure.

from_evaluation_datasets(evaluation_datasets, config=None) staticmethod

Run the membership inference attack and return the protection score.

Source code in src/nemo_safe_synthesizer/evaluation/components/membership_inference_protection.py
@staticmethod
def from_evaluation_datasets(
    evaluation_datasets: EvaluationDatasets, config: SafeSynthesizerParameters | None = None
) -> MembershipInferenceProtection:
    """Run the membership inference attack and return the protection score."""
    score, attack_sum_df, tps_values, fps_values = MembershipInferenceProtection.mia(
        training_df=evaluation_datasets.training,
        synthetic_df=evaluation_datasets.synthetic,
        test_df=evaluation_datasets.test,
        # FIXME config setting?
        # column_name: str | None = None,
    )
    return MembershipInferenceProtection(
        score=score, attack_sum_df=attack_sum_df, tps_values=tps_values, fps_values=fps_values
    )

mia(training_df, synthetic_df, test_df, column_name=None) staticmethod

Run the full membership inference attack pipeline.

Normalizes data, builds FAISS indexes and/or text embeddings, then repeats the attack across multiple runs for stability. The final score is the average across all runs, mapped to a 0--10 privacy grade.

Parameters:

Name Type Description Default
training_df DataFrame

Training dataframe.

required
synthetic_df DataFrame

Synthetic dataframe.

required
test_df DataFrame | None

Holdout dataframe (required -- returns unavailable if None).

required
column_name str | None

Optional single column to restrict the attack to.

None

Returns:

Type Description
tuple[EvaluationScore, DataFrame | None, dict[float, int], dict[float, int]]

Tuple of (score, attack summary dataframe, TP counts, FP counts).

Source code in src/nemo_safe_synthesizer/evaluation/components/membership_inference_protection.py
@staticmethod
def mia(
    training_df: pd.DataFrame,
    synthetic_df: pd.DataFrame,
    test_df: pd.DataFrame | None,
    column_name: str | None = None,
) -> tuple[
    EvaluationScore,
    pd.DataFrame | None,
    dict[float, int],
    dict[float, int],
]:
    """Run the full membership inference attack pipeline.

    Normalizes data, builds FAISS indexes and/or text embeddings, then
    repeats the attack across multiple runs for stability. The final score
    is the average across all runs, mapped to a 0--10 privacy grade.

    Args:
        training_df: Training dataframe.
        synthetic_df: Synthetic dataframe.
        test_df: Holdout dataframe (required -- returns unavailable if ``None``).
        column_name: Optional single column to restrict the attack to.

    Returns:
        Tuple of (score, attack summary dataframe, TP counts, FP counts).
    """
    ias = EvaluationScore(grade=PrivacyGrade.UNAVAILABLE)
    attack_sum_df = None
    tps_values = {}
    fps_values = {}
    if test_df is None:
        logger.info("No test data provided for Membership Inference Attack. Skipping Membership Inference Attack.")
        return ias, attack_sum_df, tps_values, fps_values

    try:
        # If user entered column_name, reduce dataframes down to that field
        if column_name:
            training_df = training_df.filter([column_name])
            test_df = test_df.filter([column_name])
            synthetic_df = synthetic_df.filter([column_name])

        text_fields = find_text_fields(training_df)
        text_cnt = len(text_fields)
        tabular_cnt = len(training_df.columns) - text_cnt

        # For multimodal we limit the test size to DEFAULT_RECORD_COUNT
        if text_cnt > 0 and tabular_cnt > 0:
            if len(test_df) > DEFAULT_RECORD_COUNT:
                test_df = test_df.sample(n=DEFAULT_RECORD_COUNT, random_state=2)

        # Repeat MIA for stability
        repeat_count = 10
        # Sampling what we need for all MIA runs upfront speeds things up
        training_size_needed = len(test_df) * repeat_count
        training_df_use = training_df.copy()
        training_df_use.columns = training_df.columns
        if len(training_df_use) > training_size_needed:
            training_df_use = training_df.sample(n=training_size_needed, random_state=1)

        # Divide the dataframes into text and tabular
        text_fields = find_text_fields(training_df_use)
        if len(text_fields) > 0:
            training_df_use, training_df_text = divide_tabular_text(training_df_use, text_fields)
            test_df, test_df_text = divide_tabular_text(test_df, text_fields)
            synthetic_df, synthetic_df_text = divide_tabular_text(synthetic_df, text_fields)

        # Normalize the tabular data (adjusted for multimodal)
        if tabular_cnt > 0:
            try:
                training_df_norm, synthetic_df_norm, test_df_norm = MembershipInferenceProtection._normalize(
                    training_df_use, synthetic_df, test_df
                )
            except Exception:
                training_df_norm, synthetic_df_norm, test_df_norm = MembershipInferenceProtection._normalize_onehot(
                    training_df_use, synthetic_df, test_df
                )
            # Create nearest neighbor index on the synthetic tabular data (torch GPU or sklearn CPU fallback)
            nn_index = NearestNeighborSearch(n_neighbors=len(synthetic_df_norm))
            nn_index.fit(np.ascontiguousarray(np.array(synthetic_df_norm)).astype(np.float32))
        else:
            training_df_norm = pd.DataFrame()
            test_df_norm = pd.DataFrame()
            synthetic_df_norm = pd.DataFrame()
            nn_index = None

        # Create embeddings for text fields and combine the normalized tabular and the
        # new text embeddings into one dataframe.
        if len(text_fields) > 0:
            embedder = SentenceTransformer("distiluse-base-multilingual-cased-v2")
            training_df_embeddings = embed_text(training_df_text, embedder)
            test_df_embeddings = embed_text(test_df_text, embedder)
            synthetic_df_embeddings = embed_text(synthetic_df_text, embedder)
            training_df_norm = pd.concat([training_df_norm, training_df_embeddings], axis=1)
            test_df_norm = pd.concat([test_df_norm, test_df_embeddings], axis=1)
            synthetic_df_norm = pd.concat([synthetic_df_norm, synthetic_df_embeddings], axis=1)

        scores = []
        attack_sum_values = []
        tps_values = {}
        fps_values = {}
        for i in [0.1, 0.2, 0.3, 0.4]:
            tps_values[i] = 0
            fps_values[i] = 0

        for i in range(repeat_count):
            score, attack_sum, tp_cnts, fp_cnts = MembershipInferenceProtection._compute_mia(
                training_df_norm,
                synthetic_df_norm,
                test_df_norm,
                nn_index,
                i,
                text_cnt,
                tabular_cnt,
            )
            for tp_cnt in tp_cnts:
                tps_values[tp_cnt] += tp_cnts[tp_cnt]
            for fp_cnt in fp_cnts:
                fps_values[fp_cnt] += fp_cnts[fp_cnt]
            scores.append(score)
            attack_sum_values = attack_sum_values + attack_sum

        values = {}
        for grade in PrivacyGrade:
            values[grade.value] = 0

        total = 0
        for value in attack_sum_values:
            total += 1
            values[value] += 1

        for i in values:
            values[i] = int((values[i] / total) * 100)

        attack_sum_df = pd.DataFrame(
            {
                "Protection": values.keys(),
                "Attack Percentage": values.values(),
            }
        )

        # The final score is the average over all MIA runs
        score_avg = mean(scores)

        # Translate score to 0 to 10 range
        final_score = round(((score_avg - 1) / 2 * 10), 1)

        # Translate score to overall grade
        grade = EvaluationScore.score_to_grade(final_score, is_privacy=True)

        ias = EvaluationScore(raw_score=score_avg, grade=grade, score=final_score)
    except Exception as e:
        logger.exception("Failed to calculate Membership Inference Attack Score.")
        ias = EvaluationScore(notes=str(e))

    return (
        ias,
        attack_sum_df,
        tps_values,
        fps_values,
    )