Skip to content

Application

Classes

Modules

add_tax_info_command

Provide a command object for adding taxonomy information.

Attributes

Table = TypeVar('Table', DataFrame[TidyObservationTable], DataFrame[WideObservationTable], DataFrame[StandardProfile]) module-attribute

Classes

AddTaxInfoCommand dataclass

Define the command object for adding taxonomy information.

Source code in src/taxpasta/application/add_tax_info_command.py
@dataclass(frozen=True)
class AddTaxInfoCommand:
    """Define the command object for adding taxonomy information."""

    taxonomy_service: Optional[TaxonomyService] = None
    summarise_at: Optional[str] = None
    add_name: bool = False
    add_rank: bool = False
    add_lineage: bool = False
    add_id_lineage: bool = False
    add_rank_lineage: bool = False

    def execute(self, table: Table) -> Table:
        """Execute the command to add taxonomy information."""
        if self.taxonomy_service is None:
            return table
        # The order of the following conditions is chosen specifically to yield a
        # pleasant final output format.
        result = table
        if self.add_rank_lineage:
            result = self.taxonomy_service.add_rank_lineage(result)
        if self.add_id_lineage:
            result = self.taxonomy_service.add_identifier_lineage(result)
        if self.add_lineage:
            result = self.taxonomy_service.add_name_lineage(result)
        if self.add_rank:
            result = self.taxonomy_service.add_rank(result)
        if self.add_name:
            result = self.taxonomy_service.add_name(result)
        return result

    def __post_init__(self) -> None:
        """Perform post initialization validation."""
        no_taxonomy = self.taxonomy_service is None
        if self.summarise_at is not None and no_taxonomy:
            raise ValueError(
                "The summarising feature '--summarise-at' requires a taxonomy. Please "
                "provide one using the option '--taxonomy'."
            )
        if self.add_name and no_taxonomy:
            raise ValueError(
                "The '--add-name' option requires a taxonomy. Please "
                "provide one using the option '--taxonomy'."
            )
        if self.add_rank and no_taxonomy:
            raise ValueError(
                "The '--add-rank' option requires a taxonomy. Please "
                "provide one using the option '--taxonomy'."
            )
        if self.add_lineage and no_taxonomy:
            raise ValueError(
                "The '--add-lineage' option requires a taxonomy. Please "
                "provide one using the option '--taxonomy'."
            )
        if self.add_id_lineage and no_taxonomy:
            raise ValueError(
                "The '--add-id-lineage' option requires a taxonomy. Please "
                "provide one using the option '--taxonomy'."
            )
        if self.add_rank_lineage and no_taxonomy:
            raise ValueError(
                "The '--add-rank-lineage' option requires a taxonomy. Please "
                "provide one using the option '--taxonomy'."
            )
Attributes
add_id_lineage: bool = False class-attribute instance-attribute
add_lineage: bool = False class-attribute instance-attribute
add_name: bool = False class-attribute instance-attribute
add_rank: bool = False class-attribute instance-attribute
add_rank_lineage: bool = False class-attribute instance-attribute
summarise_at: Optional[str] = None class-attribute instance-attribute
taxonomy_service: Optional[TaxonomyService] = None class-attribute instance-attribute
Functions
__init__(taxonomy_service: Optional[TaxonomyService] = None, summarise_at: Optional[str] = None, add_name: bool = False, add_rank: bool = False, add_lineage: bool = False, add_id_lineage: bool = False, add_rank_lineage: bool = False) -> None
__post_init__() -> None

Perform post initialization validation.

Source code in src/taxpasta/application/add_tax_info_command.py
def __post_init__(self) -> None:
    """Perform post initialization validation."""
    no_taxonomy = self.taxonomy_service is None
    if self.summarise_at is not None and no_taxonomy:
        raise ValueError(
            "The summarising feature '--summarise-at' requires a taxonomy. Please "
            "provide one using the option '--taxonomy'."
        )
    if self.add_name and no_taxonomy:
        raise ValueError(
            "The '--add-name' option requires a taxonomy. Please "
            "provide one using the option '--taxonomy'."
        )
    if self.add_rank and no_taxonomy:
        raise ValueError(
            "The '--add-rank' option requires a taxonomy. Please "
            "provide one using the option '--taxonomy'."
        )
    if self.add_lineage and no_taxonomy:
        raise ValueError(
            "The '--add-lineage' option requires a taxonomy. Please "
            "provide one using the option '--taxonomy'."
        )
    if self.add_id_lineage and no_taxonomy:
        raise ValueError(
            "The '--add-id-lineage' option requires a taxonomy. Please "
            "provide one using the option '--taxonomy'."
        )
    if self.add_rank_lineage and no_taxonomy:
        raise ValueError(
            "The '--add-rank-lineage' option requires a taxonomy. Please "
            "provide one using the option '--taxonomy'."
        )
execute(table: Table) -> Table

Execute the command to add taxonomy information.

Source code in src/taxpasta/application/add_tax_info_command.py
def execute(self, table: Table) -> Table:
    """Execute the command to add taxonomy information."""
    if self.taxonomy_service is None:
        return table
    # The order of the following conditions is chosen specifically to yield a
    # pleasant final output format.
    result = table
    if self.add_rank_lineage:
        result = self.taxonomy_service.add_rank_lineage(result)
    if self.add_id_lineage:
        result = self.taxonomy_service.add_identifier_lineage(result)
    if self.add_lineage:
        result = self.taxonomy_service.add_name_lineage(result)
    if self.add_rank:
        result = self.taxonomy_service.add_rank(result)
    if self.add_name:
        result = self.taxonomy_service.add_name(result)
    return result

consensus_application

Provide an abstract base class for consensus building applications.

Classes

ConsensusApplication

Bases: ABC

Define an abstract base class for consensus building applications.

Source code in src/taxpasta/application/consensus_application.py
class ConsensusApplication(ABC):
    """Define an abstract base class for consensus building applications."""

    @classmethod
    @abstractmethod
    def run(
        cls, profiles: Iterable[DataFrame[StandardProfile]], taxonomy: Path
    ) -> DataFrame:
        """
        Build a consensus from two or more taxonomic profiles.

        Args:
            profiles: Standardized profiles.
            taxonomy: Provide a shared taxonomy.

        """
Functions
run(profiles: Iterable[DataFrame[StandardProfile]], taxonomy: Path) -> DataFrame abstractmethod classmethod

Build a consensus from two or more taxonomic profiles.

Parameters:

Name Type Description Default
profiles Iterable[DataFrame[StandardProfile]]

Standardized profiles.

required
taxonomy Path

Provide a shared taxonomy.

required
Source code in src/taxpasta/application/consensus_application.py
@classmethod
@abstractmethod
def run(
    cls, profiles: Iterable[DataFrame[StandardProfile]], taxonomy: Path
) -> DataFrame:
    """
    Build a consensus from two or more taxonomic profiles.

    Args:
        profiles: Standardized profiles.
        taxonomy: Provide a shared taxonomy.

    """

error

Classes

Modules

standardisation_error

Provide an exception for errors during profile extraction and transformation.

Classes
StandardisationError

Bases: TaxpastaError

Define an exception for errors during profile extraction and transformation.

Source code in src/taxpasta/application/error/standardisation_error.py
class StandardisationError(TaxpastaError):
    """Define an exception for errors during profile extraction and transformation."""

    def __init__(self, *, sample: str, profile: Path, message: str, **kwargs) -> None:
        """Initialize a taxpasta standardisation error."""
        super().__init__(**kwargs)
        self.sample = sample
        self.profile = profile
        self.message = message
Attributes
message = message instance-attribute
profile = profile instance-attribute
sample = sample instance-attribute
Functions
__init__(*, sample: str, profile: Path, message: str, **kwargs) -> None

Initialize a taxpasta standardisation error.

Source code in src/taxpasta/application/error/standardisation_error.py
def __init__(self, *, sample: str, profile: Path, message: str, **kwargs) -> None:
    """Initialize a taxpasta standardisation error."""
    super().__init__(**kwargs)
    self.sample = sample
    self.profile = profile
    self.message = message
taxpasta_error

Provide the base class for all taxpasta application errors.

Classes
TaxpastaError

Bases: Exception

Define the base class for all taxpasta application errors.

Source code in src/taxpasta/application/error/taxpasta_error.py
class TaxpastaError(Exception):
    """Define the base class for all taxpasta application errors."""

    def __int__(self, **kwargs) -> None:
        """Initialize a base taxpasta error."""
        super().__init__(**kwargs)
Functions
__int__(**kwargs) -> None

Initialize a base taxpasta error.

Source code in src/taxpasta/application/error/taxpasta_error.py
def __int__(self, **kwargs) -> None:
    """Initialize a base taxpasta error."""
    super().__init__(**kwargs)

sample_handling_application

Provide a sample handling application.

Attributes

logger = logging.getLogger(__name__) module-attribute

Classes

SampleHandlingApplication

Define the sample handling application.

Source code in src/taxpasta/application/sample_handling_application.py
class SampleHandlingApplication:
    """Define the sample handling application."""

    def __init__(
        self,
        *,
        profile_reader: Type[ProfileReader],
        profile_standardiser: Type[ProfileStandardisationService],
        taxonomy_service: Optional[TaxonomyService] = None,
        **kwargs: dict,
    ):
        """
        Initialize the sample handling application.

        Args:
            profile_reader: A profile reader for a specific taxonomic profile format.
            profile_standardiser: A profile standardisation service for a specific
                taxonomic profile format.
            taxonomy_service: A taxonomy service instance. It is assumed that all
                profiles to be handled in the application are based on the given
                taxonomy loaded in the service instance.
            **kwargs: Passed on for inheritance.

        """
        super().__init__(**kwargs)
        self.reader = profile_reader
        self.standardiser = profile_standardiser
        self.taxonomy_service = taxonomy_service

    def etl_sample(self, name: str, profile: Path) -> Sample:
        """
        Extract, transform, and load a profile into a sample.

        Args:
            name: A name for the sample.
            profile: The path to a taxonomic profile.

        Returns:
            A sample.

        Raises:
            StandardisationError: If the given profile does not match the validation
                schema.

        """
        try:
            result = self.standardiser.transform(self.reader.read(profile))
        except SchemaErrors as errors:
            if errors.data.empty:
                raise StandardisationError(
                    sample=name, profile=profile, message="Profile is empty."
                ) from errors
            else:
                raise StandardisationError(
                    sample=name, profile=profile, message=str(errors.failure_cases)
                ) from errors
        except ValueError as error:
            raise StandardisationError(
                sample=name, profile=profile, message=str(error)
            ) from error

        return Sample(name=name, profile=result)

    def summarise_sample(self, sample: Sample, rank: str) -> Sample:
        """Summarise a sample at a higher taxonomic rank."""
        assert self.taxonomy_service is not None  # nosec assert_used
        return Sample(
            name=sample.name,
            profile=self.taxonomy_service.summarise_at(sample.profile, rank),
        )

    def merge_samples(
        self,
        samples: Iterable[Sample],
        wide_format: bool,
    ) -> DataFrame[WideObservationTable] | DataFrame[TidyObservationTable]:
        """
        Merge two or more  samples into a single table.

        Args:
            samples: Two or more samples.
            wide_format: Whether to create wide or (tidy) long format output.

        Returns:
            A single table containing all samples in the desired format.

        """
        if wide_format:
            wide_table = SampleMergingService.merge_wide(samples)
            # If any profile did not have all the same taxonomy IDs as the combined
            # table, additional zeroes were introduced.
            if any(
                not wide_table[WideObservationTable.taxonomy_id]
                .isin(sample.profile[StandardProfile.taxonomy_id])
                .all()
                for sample in samples
            ):
                logger.warning(
                    "The merged profiles contained different taxa. Additional "
                    "zeroes were introduced for missing taxa."
                )
            return wide_table
        else:
            return SampleMergingService.merge_long(samples)
Attributes
reader = profile_reader instance-attribute
standardiser = profile_standardiser instance-attribute
taxonomy_service = taxonomy_service instance-attribute
Functions
__init__(*, profile_reader: Type[ProfileReader], profile_standardiser: Type[ProfileStandardisationService], taxonomy_service: Optional[TaxonomyService] = None, **kwargs: dict)

Initialize the sample handling application.

Parameters:

Name Type Description Default
profile_reader Type[ProfileReader]

A profile reader for a specific taxonomic profile format.

required
profile_standardiser Type[ProfileStandardisationService]

A profile standardisation service for a specific taxonomic profile format.

required
taxonomy_service Optional[TaxonomyService]

A taxonomy service instance. It is assumed that all profiles to be handled in the application are based on the given taxonomy loaded in the service instance.

None
**kwargs dict

Passed on for inheritance.

{}
Source code in src/taxpasta/application/sample_handling_application.py
def __init__(
    self,
    *,
    profile_reader: Type[ProfileReader],
    profile_standardiser: Type[ProfileStandardisationService],
    taxonomy_service: Optional[TaxonomyService] = None,
    **kwargs: dict,
):
    """
    Initialize the sample handling application.

    Args:
        profile_reader: A profile reader for a specific taxonomic profile format.
        profile_standardiser: A profile standardisation service for a specific
            taxonomic profile format.
        taxonomy_service: A taxonomy service instance. It is assumed that all
            profiles to be handled in the application are based on the given
            taxonomy loaded in the service instance.
        **kwargs: Passed on for inheritance.

    """
    super().__init__(**kwargs)
    self.reader = profile_reader
    self.standardiser = profile_standardiser
    self.taxonomy_service = taxonomy_service
etl_sample(name: str, profile: Path) -> Sample

Extract, transform, and load a profile into a sample.

Parameters:

Name Type Description Default
name str

A name for the sample.

required
profile Path

The path to a taxonomic profile.

required

Returns:

Type Description
Sample

A sample.

Raises:

Type Description
StandardisationError

If the given profile does not match the validation schema.

Source code in src/taxpasta/application/sample_handling_application.py
def etl_sample(self, name: str, profile: Path) -> Sample:
    """
    Extract, transform, and load a profile into a sample.

    Args:
        name: A name for the sample.
        profile: The path to a taxonomic profile.

    Returns:
        A sample.

    Raises:
        StandardisationError: If the given profile does not match the validation
            schema.

    """
    try:
        result = self.standardiser.transform(self.reader.read(profile))
    except SchemaErrors as errors:
        if errors.data.empty:
            raise StandardisationError(
                sample=name, profile=profile, message="Profile is empty."
            ) from errors
        else:
            raise StandardisationError(
                sample=name, profile=profile, message=str(errors.failure_cases)
            ) from errors
    except ValueError as error:
        raise StandardisationError(
            sample=name, profile=profile, message=str(error)
        ) from error

    return Sample(name=name, profile=result)
merge_samples(samples: Iterable[Sample], wide_format: bool) -> DataFrame[WideObservationTable] | DataFrame[TidyObservationTable]

Merge two or more samples into a single table.

Parameters:

Name Type Description Default
samples Iterable[Sample]

Two or more samples.

required
wide_format bool

Whether to create wide or (tidy) long format output.

required

Returns:

Type Description
DataFrame[WideObservationTable] | DataFrame[TidyObservationTable]

A single table containing all samples in the desired format.

Source code in src/taxpasta/application/sample_handling_application.py
def merge_samples(
    self,
    samples: Iterable[Sample],
    wide_format: bool,
) -> DataFrame[WideObservationTable] | DataFrame[TidyObservationTable]:
    """
    Merge two or more  samples into a single table.

    Args:
        samples: Two or more samples.
        wide_format: Whether to create wide or (tidy) long format output.

    Returns:
        A single table containing all samples in the desired format.

    """
    if wide_format:
        wide_table = SampleMergingService.merge_wide(samples)
        # If any profile did not have all the same taxonomy IDs as the combined
        # table, additional zeroes were introduced.
        if any(
            not wide_table[WideObservationTable.taxonomy_id]
            .isin(sample.profile[StandardProfile.taxonomy_id])
            .all()
            for sample in samples
        ):
            logger.warning(
                "The merged profiles contained different taxa. Additional "
                "zeroes were introduced for missing taxa."
            )
        return wide_table
    else:
        return SampleMergingService.merge_long(samples)
summarise_sample(sample: Sample, rank: str) -> Sample

Summarise a sample at a higher taxonomic rank.

Source code in src/taxpasta/application/sample_handling_application.py
def summarise_sample(self, sample: Sample, rank: str) -> Sample:
    """Summarise a sample at a higher taxonomic rank."""
    assert self.taxonomy_service is not None  # nosec assert_used
    return Sample(
        name=sample.name,
        profile=self.taxonomy_service.summarise_at(sample.profile, rank),
    )

service

Attributes

Classes

Modules

profile_reader

Provide an abstract base class for reading taxonomic profiles.

Attributes
Classes
ProfileReader

Bases: ABC

Define an abstract base class for reading taxonomic profiles.

Source code in src/taxpasta/application/service/profile_reader.py
class ProfileReader(ABC):
    """Define an abstract base class for reading taxonomic profiles."""

    @classmethod
    @abstractmethod
    def read(cls, profile: BufferOrFilepath) -> pd.DataFrame:
        """Read a taxonomic profile from the given source."""

    @classmethod
    def _check_num_columns(
        cls, profile: pd.DataFrame, schema_model: Type[pa.DataFrameModel]
    ) -> None:
        """Perform a strict test on the number of columns."""
        num_cols = len(schema_model.to_schema().columns)
        if len(profile.columns) != num_cols:
            raise ValueError(
                f"Unexpected report format. It has {len(profile.columns)} columns but "
                f"only {num_cols} are expected."
            )
Functions
read(profile: BufferOrFilepath) -> pd.DataFrame abstractmethod classmethod

Read a taxonomic profile from the given source.

Source code in src/taxpasta/application/service/profile_reader.py
@classmethod
@abstractmethod
def read(cls, profile: BufferOrFilepath) -> pd.DataFrame:
    """Read a taxonomic profile from the given source."""
profile_standardisation_service

Provide an abstract base class for a profile standardisation service.

Classes
ProfileStandardisationService

Bases: ABC

Define an abstract base class for a profile standardisation service.

Source code in src/taxpasta/application/service/profile_standardisation_service.py
class ProfileStandardisationService(ABC):
    """Define an abstract base class for a profile standardisation service."""

    @classmethod
    @abstractmethod
    def transform(cls, profile: DataFrame) -> DataFrame[StandardProfile]:
        """
        Tidy up and standardize a given taxonomic profile.

        Args:
            profile: The taxonomic profile of a particular tool.

        Returns:
            A standardized profile.

        """
Functions
transform(profile: DataFrame) -> DataFrame[StandardProfile] abstractmethod classmethod

Tidy up and standardize a given taxonomic profile.

Parameters:

Name Type Description Default
profile DataFrame

The taxonomic profile of a particular tool.

required

Returns:

Type Description
DataFrame[StandardProfile]

A standardized profile.

Source code in src/taxpasta/application/service/profile_standardisation_service.py
@classmethod
@abstractmethod
def transform(cls, profile: DataFrame) -> DataFrame[StandardProfile]:
    """
    Tidy up and standardize a given taxonomic profile.

    Args:
        profile: The taxonomic profile of a particular tool.

    Returns:
        A standardized profile.

    """
standard_profile_writer

Provide an abstract base class for writing a standardized profile.

Attributes
Classes
StandardProfileWriter

Bases: ABC

Define an abstract base class for writing a standardized profile.

Source code in src/taxpasta/application/service/standard_profile_writer.py
class StandardProfileWriter(ABC):
    """Define an abstract base class for writing a standardized profile."""

    @classmethod
    @abstractmethod
    def write(
        cls, profile: DataFrame[StandardProfile], target: BufferOrFilepath, **kwargs
    ) -> None:
        """Write a standardized profile to a file."""
Functions
write(profile: DataFrame[StandardProfile], target: BufferOrFilepath, **kwargs) -> None abstractmethod classmethod

Write a standardized profile to a file.

Source code in src/taxpasta/application/service/standard_profile_writer.py
@classmethod
@abstractmethod
def write(
    cls, profile: DataFrame[StandardProfile], target: BufferOrFilepath, **kwargs
) -> None:
    """Write a standardized profile to a file."""
table_reader

Provide an abstract base class for reading tables.

Attributes
Classes
TableReader

Bases: ABC

Define an abstract base class for reading tables.

Source code in src/taxpasta/application/service/table_reader.py
class TableReader(ABC):
    """Define an abstract base class for reading tables."""

    @classmethod
    @abstractmethod
    def read(cls, source: BufferOrFilepath, **kwargs) -> pd.DataFrame:
        """Read a table from the given source."""
Functions
read(source: BufferOrFilepath, **kwargs) -> pd.DataFrame abstractmethod classmethod

Read a table from the given source.

Source code in src/taxpasta/application/service/table_reader.py
@classmethod
@abstractmethod
def read(cls, source: BufferOrFilepath, **kwargs) -> pd.DataFrame:
    """Read a table from the given source."""
tidy_observation_table_writer

Provide an abstract base class for writing tidy observation tables.

Attributes
Classes
TidyObservationTableWriter

Bases: ABC

Define an abstract base class for writing tidy observation tables.

Source code in src/taxpasta/application/service/tidy_observation_table_writer.py
class TidyObservationTableWriter(ABC):
    """Define an abstract base class for writing tidy observation tables."""

    @classmethod
    @abstractmethod
    def write(
        cls, table: DataFrame[TidyObservationTable], target: BufferOrFilepath, **kwargs
    ) -> None:
        """Write a tidy observation table to the given buffer or file."""
Functions
write(table: DataFrame[TidyObservationTable], target: BufferOrFilepath, **kwargs) -> None abstractmethod classmethod

Write a tidy observation table to the given buffer or file.

Source code in src/taxpasta/application/service/tidy_observation_table_writer.py
@classmethod
@abstractmethod
def write(
    cls, table: DataFrame[TidyObservationTable], target: BufferOrFilepath, **kwargs
) -> None:
    """Write a tidy observation table to the given buffer or file."""
wide_observation_table_writer

Provide an abstract base class for writing observation matrices.

Attributes
Classes
WideObservationTableWriter

Bases: ABC

Define an abstract base class for writing observation matrices.

Source code in src/taxpasta/application/service/wide_observation_table_writer.py
class WideObservationTableWriter(ABC):
    """Define an abstract base class for writing observation matrices."""

    @classmethod
    @abstractmethod
    def write(
        cls, matrix: DataFrame[WideObservationTable], target: BufferOrFilepath, **kwargs
    ) -> None:
        """Write an observation matrix to the given buffer or file."""
Functions
write(matrix: DataFrame[WideObservationTable], target: BufferOrFilepath, **kwargs) -> None abstractmethod classmethod

Write an observation matrix to the given buffer or file.

Source code in src/taxpasta/application/service/wide_observation_table_writer.py
@classmethod
@abstractmethod
def write(
    cls, matrix: DataFrame[WideObservationTable], target: BufferOrFilepath, **kwargs
) -> None:
    """Write an observation matrix to the given buffer or file."""