Skip to content

membership_inference_protection

membership_inference_protection

Classes:

Name Description
MembershipInferenceProtection

Membership Inference Protection privacy metric.

MembershipInferenceProtection pydantic-model

Bases: Component

Membership Inference Protection privacy metric.

Simulates a membership inference attack: can an adversary determine whether a specific record was in the training set by comparing it to the synthetic data? The attack is repeated across multiple similarity thresholds and data proportions for stability.

See Also

https://arxiv.org/abs/2501.03941 -- Synthetic Data Privacy Metrics.

Config:

  • arbitrary_types_allowed: True

Fields:

attack_sum_df = None pydantic-field

Summary of attack outcomes by protection grade.

tps_values = None pydantic-field

True positive counts per similarity threshold.

fps_values = None pydantic-field

False positive counts per similarity threshold.

jinja_context cached property

Template context with the membership-inference pie chart figure.

from_evaluation_dataset(evaluation_dataset, config=None) staticmethod

Run the membership inference attack and return the protection score.

Source code in src/nemo_safe_synthesizer/evaluation/components/membership_inference_protection.py
@staticmethod
def from_evaluation_dataset(
    evaluation_dataset: EvaluationDataset, config: SafeSynthesizerParameters | None = None
) -> MembershipInferenceProtection:
    """Run the membership inference attack and return the protection score."""
    score, attack_sum_df, tps_values, fps_values = MembershipInferenceProtection.mia(
        df_train=evaluation_dataset.reference,
        df_synth=evaluation_dataset.output,
        df_test=evaluation_dataset.test,
        # FIXME config setting?
        # column_name: str | None = None,
    )
    return MembershipInferenceProtection(
        score=score, attack_sum_df=attack_sum_df, tps_values=tps_values, fps_values=fps_values
    )

find_text_fields(df) staticmethod

Return column names classified as free text.

Source code in src/nemo_safe_synthesizer/evaluation/components/membership_inference_protection.py
@staticmethod
def find_text_fields(df: pd.DataFrame) -> list[str]:
    """Return column names classified as free text."""
    text_fields = []
    for col in df.columns:
        field_info = describe_field(col, df[col])
        if field_info.type.value == "text":
            text_fields.append(col)

    return text_fields

embed_text(df) staticmethod

Embed each text column and average into a single embedding per row.

Source code in src/nemo_safe_synthesizer/evaluation/components/membership_inference_protection.py
@staticmethod
def embed_text(df: pd.DataFrame) -> pd.DataFrame:
    """Embed each text column and average into a single embedding per row."""
    embeddings = {}
    embedder = SentenceTransformer("distiluse-base-multilingual-cased-v2")
    for col in df.columns:
        data = df[col].to_list()
        data = [str(r) for r in data]
        embeddings[col] = embedder.encode(data, show_progress_bar=False, convert_to_numpy=True)

    avg_embeddings = []
    for i in range(len(df)):
        # TODO: Is this average what we want? When there are more than 2 columns, we will
        # overweight later columns relative to earlier columns.
        norm = embeddings[df.columns[0]][i]
        for j in range(1, len(df.columns)):
            field = df.columns[j]
            norm = np.average([norm, embeddings[field][i]], axis=0)

        avg_embeddings.append(norm)

    df_embeddings = pd.DataFrame({"embedding": list(avg_embeddings)})

    return df_embeddings

divide_tabular_text(df, text_fields) staticmethod

Split a dataframe into tabular-only and text-only subsets.

Source code in src/nemo_safe_synthesizer/evaluation/components/membership_inference_protection.py
@staticmethod
def divide_tabular_text(df: pd.DataFrame, text_fields: list) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Split a dataframe into tabular-only and text-only subsets."""
    tabular_fields = []
    for col in df.columns:
        if col not in text_fields:
            tabular_fields.append(col)
    df_tabular = df.filter(tabular_fields)
    df_text = df.filter(text_fields)

    return (df_tabular, df_text)

mia(df_train, df_test, df_synth, column_name=None) staticmethod

Run the full membership inference attack pipeline.

Normalizes data, builds FAISS indexes and/or text embeddings, then repeats the attack across multiple runs for stability. The final score is the average across all runs, mapped to a 0--10 privacy grade.

Parameters:

Name Type Description Default
df_train DataFrame

Training dataframe.

required
df_test DataFrame | None

Holdout dataframe (required -- returns unavailable if None).

required
df_synth DataFrame

Synthetic dataframe.

required
column_name str | None

Optional single column to restrict the attack to.

None

Returns:

Type Description
tuple[EvaluationScore, DataFrame | None, dict[float, int], dict[float, int]]

Tuple of (score, attack summary dataframe, TP counts, FP counts).

Source code in src/nemo_safe_synthesizer/evaluation/components/membership_inference_protection.py
@staticmethod
def mia(
    df_train: pd.DataFrame,
    df_test: pd.DataFrame | None,
    df_synth: pd.DataFrame,
    column_name: str | None = None,
) -> tuple[
    EvaluationScore,
    pd.DataFrame | None,
    dict[float, int],
    dict[float, int],
]:
    """Run the full membership inference attack pipeline.

    Normalizes data, builds FAISS indexes and/or text embeddings, then
    repeats the attack across multiple runs for stability. The final score
    is the average across all runs, mapped to a 0--10 privacy grade.

    Args:
        df_train: Training dataframe.
        df_test: Holdout dataframe (required -- returns unavailable if ``None``).
        df_synth: Synthetic dataframe.
        column_name: Optional single column to restrict the attack to.

    Returns:
        Tuple of (score, attack summary dataframe, TP counts, FP counts).
    """
    ias = EvaluationScore(grade=PrivacyGrade.UNAVAILABLE)
    attack_sum_df = None
    tps_values = {}
    fps_values = {}
    if df_test is None:
        logger.info("No test data provided for Membership Inference Attack. Skipping Membership Inference Attack.")
        return ias, attack_sum_df, tps_values, fps_values

    try:
        # If user entered column_name, reduce dataframes down to that field
        if column_name:
            df_train = df_train.filter([column_name])
            df_test = df_test.filter([column_name])
            df_synth = df_synth.filter([column_name])

        text_fields = MembershipInferenceProtection.find_text_fields(df_train)
        text_cnt = len(text_fields)
        tabular_cnt = len(df_train.columns) - text_cnt

        # For multimodal we limit the test size to DEFAULT_RECORD_COUNT
        if text_cnt > 0 and tabular_cnt > 0:
            if len(df_test) > DEFAULT_RECORD_COUNT:
                df_test = df_test.sample(n=DEFAULT_RECORD_COUNT, random_state=2)

        # Repeat MIA for stability
        repeat_count = 10
        # Sampling what we need for all MIA runs upfront speeds things up
        train_size_needed = len(df_test) * repeat_count
        df_train_use = df_train.copy()
        df_train_use.columns = df_train.columns
        if len(df_train_use) > train_size_needed:
            df_train_use = df_train.sample(n=train_size_needed, random_state=1)

        # Divide the dataframes into text and tabular
        text_fields = MembershipInferenceProtection.find_text_fields(df_train_use)
        if len(text_fields) > 0:
            df_train_use, df_train_text = MembershipInferenceProtection.divide_tabular_text(
                df_train_use, text_fields
            )
            df_test, df_test_text = MembershipInferenceProtection.divide_tabular_text(df_test, text_fields)
            df_synth, df_synth_text = MembershipInferenceProtection.divide_tabular_text(df_synth, text_fields)

        # Normalize the tabular data (adjusted for multimodal)
        if tabular_cnt > 0:
            try:
                df_train_norm, df_test_norm, df_synth_norm = MembershipInferenceProtection._normalize(
                    df_train_use, df_test, df_synth
                )
            except Exception:
                df_train_norm, df_test_norm, df_synth_norm = MembershipInferenceProtection._normalize_onehot(
                    df_train_use, df_test, df_synth
                )
            # Create nearest neighbor index on the synthetic tabular data (torch GPU or sklearn CPU fallback)
            nn_index = NearestNeighborSearch(n_neighbors=len(df_synth_norm))
            nn_index.fit(np.ascontiguousarray(np.array(df_synth_norm)).astype(np.float32))
        else:
            df_train_norm = pd.DataFrame()
            df_test_norm = pd.DataFrame()
            df_synth_norm = pd.DataFrame()
            nn_index = None

        # Create embeddings for text fields and combine the normalized tabular and the
        # new text embeddings into one dataframe.
        if len(text_fields) > 0:
            df_train_embeddings = MembershipInferenceProtection.embed_text(df_train_text)
            df_test_embeddings = MembershipInferenceProtection.embed_text(df_test_text)
            df_synth_embeddings = MembershipInferenceProtection.embed_text(df_synth_text)
            df_train_norm = pd.concat([df_train_norm, df_train_embeddings], axis=1)
            df_test_norm = pd.concat([df_test_norm, df_test_embeddings], axis=1)
            df_synth_norm = pd.concat([df_synth_norm, df_synth_embeddings], axis=1)

        scores = []
        attack_sum_values = []
        tps_values = {}
        fps_values = {}
        for i in [0.1, 0.2, 0.3, 0.4]:
            tps_values[i] = 0
            fps_values[i] = 0

        for i in range(repeat_count):
            score, attack_sum, tp_cnts, fp_cnts = MembershipInferenceProtection._compute_mia(
                df_train_norm,
                df_test_norm,
                df_synth_norm,
                nn_index,
                i,
                text_cnt,
                tabular_cnt,
            )
            for tp_cnt in tp_cnts:
                tps_values[tp_cnt] += tp_cnts[tp_cnt]
            for fp_cnt in fp_cnts:
                fps_values[fp_cnt] += fp_cnts[fp_cnt]
            scores.append(score)
            attack_sum_values = attack_sum_values + attack_sum

        values = {}
        for grade in PrivacyGrade:
            values[grade.value] = 0

        total = 0
        for value in attack_sum_values:
            total += 1
            values[value] += 1

        for i in values:
            values[i] = int((values[i] / total) * 100)

        attack_sum_df = pd.DataFrame(
            {
                "Protection": values.keys(),
                "Attack Percentage": values.values(),
            }
        )

        # The final score is the average over all MIA runs
        score_avg = mean(scores)

        # Translate score to 0 to 10 range
        final_score = round(((score_avg - 1) / 2 * 10), 1)

        # Translate score to overall grade
        grade = EvaluationScore.score_to_grade(final_score, is_privacy=True)

        ias = EvaluationScore(raw_score=score_avg, grade=grade, score=final_score)
    except Exception as e:
        logger.exception("Failed to calculate Membership Inference Attack Score.")
        ias = EvaluationScore(notes=str(e))

    return (
        ias,
        attack_sum_df,
        tps_values,
        fps_values,
    )