Skip to content

datetime

datetime

Classes:

Name Description
DateTime

Date date/time matcher.

DateTime(name=None)

Bases: Predictor

Date date/time matcher.

Methods:

Name Description
evaluate

Given a single record determine if any

Source code in src/nemo_safe_synthesizer/pii_replacer/ner/datetime.py
def __init__(self, name: str = None):
    if name is None:
        name = self.default_name
    super().__init__(name)
    self._context = DateTimeContext(LABELS)

evaluate(in_record)

Given a single record determine if any entities are represented.

Parameters:

Name Type Description Default
in_record JSONRecord

the record to match patterns against

required

Returns:

Type Description
list[NERPrediction]

A list of entity predictions sorted by score. Top score is first entry in list.

Source code in src/nemo_safe_synthesizer/pii_replacer/ner/datetime.py
def evaluate(self, in_record: JSONRecord) -> list[NERPrediction]:
    """
    Given a single record determine if any
    entities are represented.

    Args:
        in_record: the record to match patterns against

    Returns:
        A list of entity predictions sorted by score. Top score is first entry in list.
    """
    result_set_by_field = [[] for _ in in_record.kv_pairs]
    for field_matches, record_field in zip(result_set_by_field, in_record.kv_pairs):
        # NOTE(jm): Changed to require header context no matter what, too many
        # FPs when looking in unstructured text
        if not self.header_has_context(
            record_field,
            self.KEY,
            token_patterns=self._context.header_tokens,
            regex_patterns=self._context.header_regexes,
        ):
            continue

        try:
            date_time_list = _parse_dates(record_field.value, record_field.scalar_type)
        except Exception:
            # NOTE:(jm) skip over issues like this: https://github.com/scrapinghub/dateparser/issues/679
            continue

        if date_time_list:
            for date_time in date_time_list:
                label = self._context.get_entity_label(date_time)
                start = str(record_field.value).find(date_time[0])
                end = start + len(date_time[0])
                matched_text = date_time[0]

                field_matches.append(
                    NERPrediction(
                        text=matched_text,
                        start=start,
                        end=end,
                        field=record_field.field,
                        value_path=record_field.value_path,
                        score=Score.HIGH,
                        label=label,
                        source=self.source,
                    )
                )

    date_matches = list(itertools.chain.from_iterable(result_set_by_field))
    results = sorted(date_matches, key=lambda i: i.score, reverse=True)
    return results