Skip to content

Modules

model

Classes

Modules

sample

Provide a description of a sample.

Classes
Sample

Bases: NamedTuple

Define a sample.

Source code in taxpasta/domain/model/sample.py
class Sample(NamedTuple):
    """Define a sample."""

    name: str
    profile: DataFrame[StandardProfile]
Attributes
name: str class-attribute
profile: DataFrame[StandardProfile] class-attribute
standard_profile

Provide a description of the standard profile format.

Classes
StandardProfile

Bases: pa.SchemaModel

Define the standard profile format.

Source code in taxpasta/domain/model/standard_profile.py
class StandardProfile(pa.SchemaModel):
    """Define the standard profile format."""

    taxonomy_id: Series[pd.CategoricalDtype] = pa.Field()
    count: Series[int] = pa.Field(ge=0)

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True
Attributes
count: Series[int] = pa.Field(ge=0) class-attribute
taxonomy_id: Series[pd.CategoricalDtype] = pa.Field() class-attribute
Classes
Config

Configure the schema model.

Source code in taxpasta/domain/model/standard_profile.py
class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True
Attributes
coerce = True class-attribute
ordered = True class-attribute
strict = True class-attribute
taxonomy

Provide a taxonomy model.

Classes
Taxonomy

Define a taxonomy model.

Source code in taxpasta/domain/model/taxonomy.py
class Taxonomy:
    """Define a taxonomy model."""

    pass
tidy_observation_table

Provide a description of a tidy observation table.

Classes
TidyObservationTable

Bases: pa.SchemaModel

Define the tidy observation table.

Source code in taxpasta/domain/model/tidy_observation_table.py
class TidyObservationTable(pa.SchemaModel):
    """Define the tidy observation table."""

    taxonomy_id: Series[pd.CategoricalDtype] = pa.Field()
    count: Series[int] = pa.Field(ge=0)
    sample: Series[pd.CategoricalDtype] = pa.Field()

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True
Attributes
count: Series[int] = pa.Field(ge=0) class-attribute
sample: Series[pd.CategoricalDtype] = pa.Field() class-attribute
taxonomy_id: Series[pd.CategoricalDtype] = pa.Field() class-attribute
Classes
Config

Configure the schema model.

Source code in taxpasta/domain/model/tidy_observation_table.py
class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True
Attributes
coerce = True class-attribute
ordered = True class-attribute
strict = True class-attribute
wide_observation_table

Provide a description of an observation matrix.

Classes
WideObservationTable

Bases: pa.SchemaModel

Define the observation matrix.

Source code in taxpasta/domain/model/wide_observation_table.py
class WideObservationTable(pa.SchemaModel):
    """Define the observation matrix."""

    taxonomy_id: Series[pd.CategoricalDtype] = pa.Field()
    # This field uses a regex to match all columns that are not `taxonomy_id`.
    any_samples: Series[int] = pa.Field(ge=0, alias="^(?!taxonomy_id$).*", regex=True)

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
Attributes
any_samples: Series[int] = pa.Field(ge=0, alias='^(?!taxonomy_id$).*', regex=True) class-attribute
taxonomy_id: Series[pd.CategoricalDtype] = pa.Field() class-attribute
Classes
Config

Configure the schema model.

Source code in taxpasta/domain/model/wide_observation_table.py
class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
Attributes
coerce = True class-attribute
ordered = True class-attribute

service

Classes

Modules

consensus_builder

Provide a builder service for creating a consensus between many profiles.

Classes
ConsensusBuilder

Bases: ABC

Define a builder service for creating a consensus between many profiles.

Source code in taxpasta/domain/service/consensus_builder.py
class ConsensusBuilder(ABC):
    """Define a builder service for creating a consensus between many profiles."""

    @abstractmethod
    def add_sample(self, sample: Sample) -> None:
        """Add a sample to the consensus builder."""

    @abstractmethod
    def build(self) -> pd.DataFrame:
        """Build the consensus between all added profiles."""
Functions
add_sample(sample: Sample) -> None abstractmethod

Add a sample to the consensus builder.

Source code in taxpasta/domain/service/consensus_builder.py
@abstractmethod
def add_sample(self, sample: Sample) -> None:
    """Add a sample to the consensus builder."""
build() -> pd.DataFrame abstractmethod

Build the consensus between all added profiles.

Source code in taxpasta/domain/service/consensus_builder.py
@abstractmethod
def build(self) -> pd.DataFrame:
    """Build the consensus between all added profiles."""
sample_merging_service

Provide a sample merging service that summarizes two or more samples.

Classes
SampleMergingService

Define a sample merging service that summarizes one or more samples.

Source code in taxpasta/domain/service/sample_merging_service.py
class SampleMergingService:
    """Define a sample merging service that summarizes one or more samples."""

    @classmethod
    @pa.check_types(lazy=True)
    def merge_wide(cls, samples: Iterable[Sample]) -> DataFrame[WideObservationTable]:
        """
        Merge two or more sample profiles into a wide-format observation matrix.

        Args:
            samples: Two or more samples.

        Returns:
            A single table containing one row per taxon, one column for the taxonomy
            identifier, and one column per sample with abundance counts.

        """
        # `set_index` creates a copy of the original profile which is convenient so that
        # we do not modify existing profiles but, of course, doubles the memory used.
        counts = [
            sample.profile.set_index(
                keys=StandardProfile.taxonomy_id, verify_integrity=True
            ).rename(columns={StandardProfile.count: sample.name})
            for sample in samples
        ]
        # Please note that `set_index` restores the underlying dtype of the categorical
        # column `taxonomy_id`. Thus, when we `reset_index` the column is of dtype
        # object but, due to schema coercion, this is automatically converted into a
        # categorical dtype again when pandera checks the return type.
        return (
            counts[0].join(counts[1:], how="outer").fillna(0).astype(int).reset_index()
        )

    @classmethod
    @pa.check_types(lazy=True)
    def merge_long(cls, samples: Iterable[Sample]) -> DataFrame[TidyObservationTable]:
        """
        Merge two or more sample profiles into a tidy observation table.

        Args:
            samples: Two or more samples.

        Returns:
            A single table containing three columns: taxonomy identifier, abundance
            count, and sample identifier.

        """
        # `assign` creates a copy of the original profile which is convenient so that
        # we do not modify existing profiles but, of course, doubles the memory used.
        # Please note that `concat` restores the underlying dtype of the categorical
        # column `taxonomy_id`. Thus, the column is of dtype
        # object but, due to schema coercion, this is automatically converted into a
        # categorical dtype again when pandera checks the return type. The same holds
        # for the `sample` column.
        return pd.concat(
            [sample.profile.assign(sample=sample.name) for sample in samples],
            ignore_index=True,
            copy=False,
        )
Functions
merge_long(samples: Iterable[Sample]) -> DataFrame[TidyObservationTable] classmethod

Merge two or more sample profiles into a tidy observation table.

Parameters:

Name Type Description Default
samples Iterable[Sample]

Two or more samples.

required

Returns:

Type Description
DataFrame[TidyObservationTable]

A single table containing three columns: taxonomy identifier, abundance

DataFrame[TidyObservationTable]

count, and sample identifier.

Source code in taxpasta/domain/service/sample_merging_service.py
@classmethod
@pa.check_types(lazy=True)
def merge_long(cls, samples: Iterable[Sample]) -> DataFrame[TidyObservationTable]:
    """
    Merge two or more sample profiles into a tidy observation table.

    Args:
        samples: Two or more samples.

    Returns:
        A single table containing three columns: taxonomy identifier, abundance
        count, and sample identifier.

    """
    # `assign` creates a copy of the original profile which is convenient so that
    # we do not modify existing profiles but, of course, doubles the memory used.
    # Please note that `concat` restores the underlying dtype of the categorical
    # column `taxonomy_id`. Thus, the column is of dtype
    # object but, due to schema coercion, this is automatically converted into a
    # categorical dtype again when pandera checks the return type. The same holds
    # for the `sample` column.
    return pd.concat(
        [sample.profile.assign(sample=sample.name) for sample in samples],
        ignore_index=True,
        copy=False,
    )
merge_wide(samples: Iterable[Sample]) -> DataFrame[WideObservationTable] classmethod

Merge two or more sample profiles into a wide-format observation matrix.

Parameters:

Name Type Description Default
samples Iterable[Sample]

Two or more samples.

required

Returns:

Type Description
DataFrame[WideObservationTable]

A single table containing one row per taxon, one column for the taxonomy

DataFrame[WideObservationTable]

identifier, and one column per sample with abundance counts.

Source code in taxpasta/domain/service/sample_merging_service.py
@classmethod
@pa.check_types(lazy=True)
def merge_wide(cls, samples: Iterable[Sample]) -> DataFrame[WideObservationTable]:
    """
    Merge two or more sample profiles into a wide-format observation matrix.

    Args:
        samples: Two or more samples.

    Returns:
        A single table containing one row per taxon, one column for the taxonomy
        identifier, and one column per sample with abundance counts.

    """
    # `set_index` creates a copy of the original profile which is convenient so that
    # we do not modify existing profiles but, of course, doubles the memory used.
    counts = [
        sample.profile.set_index(
            keys=StandardProfile.taxonomy_id, verify_integrity=True
        ).rename(columns={StandardProfile.count: sample.name})
        for sample in samples
    ]
    # Please note that `set_index` restores the underlying dtype of the categorical
    # column `taxonomy_id`. Thus, when we `reset_index` the column is of dtype
    # object but, due to schema coercion, this is automatically converted into a
    # categorical dtype again when pandera checks the return type.
    return (
        counts[0].join(counts[1:], how="outer").fillna(0).astype(int).reset_index()
    )