-
Notifications
You must be signed in to change notification settings - Fork 1
Add more Py_AASequence Convenience functions #25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
ebb5cdf
af516c2
4167d76
1d0d031
70eb83d
d9d6c47
266a926
f2fa5e8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,13 +2,13 @@ | |
|
|
||
| from __future__ import annotations | ||
|
|
||
| from typing import Optional | ||
| from typing import Optional, Literal | ||
| import pyopenms as oms | ||
|
|
||
|
|
||
| class Py_AASequence: | ||
| """ | ||
| A Pythonic wrapper around pyOpenMS AASequence. | ||
| A Pythonic, immutable wrapper around pyOpenMS AASequence. | ||
|
|
||
| This class provides intuitive properties and methods for working with | ||
| amino acid sequences, including common operations like reversing and | ||
|
|
@@ -40,7 +40,7 @@ def __init__(self, native_sequence: Optional[oms.AASequence] = None): | |
| @classmethod | ||
| def from_string(cls, sequence_str: str) -> Py_AASequence: | ||
| """ | ||
| Create AASequence from string representation. | ||
| Create Py_AASequence from string representation. | ||
|
|
||
| Args: | ||
| sequence_str: String representation of the amino acid sequence. | ||
|
|
@@ -57,6 +57,20 @@ def from_string(cls, sequence_str: str) -> Py_AASequence: | |
|
|
||
| # ==================== Pythonic Properties ==================== | ||
|
|
||
| @classmethod | ||
| def from_native(cls, native_sequence: oms.AASequence) -> Py_AASequence: | ||
| """ | ||
| Creates Py_AASequence from native pyOpenMS AASequence. | ||
|
|
||
| Args: | ||
| native_sequence (oms.AASequence): | ||
|
|
||
| Returns: | ||
| Py_AASequence: New wrapped opject | ||
|
|
||
| """ | ||
| return cls(native_sequence) | ||
|
|
||
| @property | ||
| def native(self) -> oms.AASequence: | ||
| """Return the underlying pyOpenMS AASequence.""" | ||
|
|
@@ -204,26 +218,126 @@ def __eq__(self, other: object) -> bool: | |
| return False | ||
| return self.sequence == other.sequence | ||
|
|
||
| def __getitem__(self, index: int) -> str: | ||
| def __getitem__(self, index): | ||
| """ | ||
| Get residue at position. | ||
| Get residue(s) at position(s). | ||
|
|
||
| Supports both single indexing and slicing, returning Py_AASequence objects. | ||
|
|
||
| Args: | ||
| index: Position in the sequence (0-based). | ||
| index: Integer for single residue, or slice object for subsequence. | ||
|
|
||
| Returns: | ||
| str: Single letter amino acid code. | ||
| Py_AASequence: Wrapped residue or subsequence. | ||
|
|
||
| Example: | ||
| >>> seq = Py_AASequence.from_string("PEPTIDE") | ||
| >>> seq[1] # Returns Py_AASequence("E") | ||
| >>> seq[1:4] # Returns Py_AASequence("EPT") | ||
| >>> seq[-1] # Returns Py_AASequence("E") | ||
| """ | ||
| if index < 0 or index >= len(self): | ||
| raise IndexError(f"Index {index} out of range for sequence of length {len(self)}") | ||
| residue = self._sequence.getResidue(index) | ||
| return residue.getOneLetterCode() | ||
| if isinstance(index, slice): | ||
| start, stop, step = index.indices(len(self)) | ||
| if step != 1: | ||
| raise ValueError("Step slicing is not supported for amino acid sequences") | ||
| return Py_AASequence.from_native(self._sequence.getSubsequence(start, stop - start)) | ||
| else: | ||
| # Handle negative indices | ||
| if index < 0: | ||
| index = len(self) + index | ||
| if index >= len(self): | ||
| raise IndexError(f"Index {index} out of range for sequence of length {len(self)}") | ||
| residue = self._sequence.getResidue(index) | ||
| residue_char = residue.getOneLetterCode() | ||
| return Py_AASequence.from_string(residue_char) | ||
jcharkow marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| def __iter__(self): | ||
| """Iterate over residues.""" | ||
| for i in range(len(self)): | ||
| yield self[i] | ||
| def __add__(self, other: Py_AASequence | str) -> Py_AASequence: | ||
| """ | ||
| Concatenate sequences. | ||
|
|
||
| Args: | ||
| other: Py_AASequence or string to append. | ||
|
|
||
| Returns: | ||
| Py_AASequence: New concatenated sequence. | ||
|
|
||
| Example: | ||
| >>> seq1 = Py_AASequence.from_string("PEP") | ||
| >>> seq2 = Py_AASequence.from_string("TIDE") | ||
| >>> combined = seq1 + seq2 | ||
| >>> print(combined.sequence) | ||
| PEPTIDE | ||
| >>> combined2 = seq1 + "TIDE" | ||
| >>> print(combined2.sequence) | ||
| PEPTIDE | ||
| """ | ||
| if isinstance(other, Py_AASequence): | ||
| combined_str = self.sequence + other.sequence | ||
| elif isinstance(other, str): | ||
| combined_str = self.sequence + other | ||
| else: | ||
| return NotImplemented | ||
| return Py_AASequence.from_string(combined_str) | ||
|
|
||
| def __radd__(self, other: str) -> Py_AASequence: | ||
| """ | ||
| Support string + Py_AASequence. | ||
|
|
||
| Example: | ||
| >>> seq = Py_AASequence.from_string("TIDE") | ||
| >>> combined = "PEP" + seq | ||
| >>> print(combined.sequence) | ||
| PEPTIDE | ||
| """ | ||
| if isinstance(other, str): | ||
| combined_str = other + self.sequence | ||
| return Py_AASequence.from_string(combined_str) | ||
| return NotImplemented | ||
|
|
||
| def __mul__(self, times: int) -> Py_AASequence: | ||
| """ | ||
| Repeat sequence. | ||
|
|
||
| Args: | ||
| times: Number of times to repeat (must be >= 0). | ||
|
|
||
| Returns: | ||
| Py_AASequence: New repeated sequence. | ||
|
|
||
| Example: | ||
| >>> seq = Py_AASequence.from_string("PEP") | ||
| >>> repeated = seq * 3 | ||
| >>> print(repeated.sequence) | ||
| PEPPEPPEP | ||
| """ | ||
| if not isinstance(times, int) or times < 0: | ||
| return NotImplemented | ||
| return Py_AASequence.from_string(self.sequence * times) | ||
|
|
||
| def __rmul__(self, times: int) -> Py_AASequence: | ||
| """Support int * Py_AASequence.""" | ||
| return self.__mul__(times) | ||
| def __contains__(self, substring: str) -> bool: | ||
| """Check if substring is in sequence.""" | ||
| return self.has_substring(substring) | ||
|
|
||
| def __hash__(self) -> int: | ||
| """Make sequences hashable for use in sets/dicts.""" | ||
| return hash(self.sequence) | ||
|
|
||
| def __lt__(self, other: Py_AASequence) -> bool: | ||
| """Lexicographic comparison by sequence.""" | ||
| if not isinstance(other, Py_AASequence): | ||
| return NotImplemented | ||
| return self.sequence < other.sequence | ||
| def count(self, residue: str) -> int: | ||
| """Count occurrences of a residue, to be consistent with str.count().""" | ||
| return self._sequence.count(residue) | ||
|
||
|
|
||
| # ==================== Additional Utilities ==================== | ||
|
|
||
| def get_mz(self, charge: int) -> float: | ||
|
|
@@ -277,4 +391,37 @@ def has_suffix(self, suffix: str) -> bool: | |
| bool: True if sequence ends with suffix. | ||
| """ | ||
| return self._sequence.hasSuffix(oms.AASequence.fromString(suffix)) | ||
|
|
||
|
|
||
| # ===================== Exporting ======================= | ||
| def to_string(self, modified=True, mod_format: Literal['default', 'unimod', 'bracket'] = 'default') -> str: | ||
| """ | ||
| Get string representation of the sequence. | ||
|
|
||
| Args: | ||
| modified (bool): Whether to include modifications in the string. | ||
| mod_format (Optional[Literal['default', 'unimod', 'bracket']]): Format for modifications. | ||
| 'default' for OpenMS format, | ||
| 'unimod' for UniMod format, | ||
| 'bracket' for bracket notation. | ||
| Default is 'default'. | ||
|
|
||
| Returns: | ||
| str: Amino acid sequence as string. | ||
|
|
||
| Example: | ||
| >>> seq = Py_AASequence.from_string("PEPTIDE") | ||
| >>> seq_str = seq.to_string() | ||
| """ | ||
| if not modified: | ||
| return self.unmodified_sequence | ||
|
|
||
| else: | ||
| if mod_format == 'default': | ||
| return self._sequence.toString() | ||
| elif mod_format == 'unimod': | ||
| return self._sequence.toUniModString() | ||
| elif mod_format == 'bracket': | ||
| return self._sequence.toBracketString() | ||
| else: | ||
| raise ValueError(f"Unsupported mod_format: {mod_format}, supported are 'default', 'unimod' and 'bracket'") | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fix typos and complete docstring in
from_native.The docstring has two issues:
native_sequence (oms.AASequence):.Apply this diff:
@classmethod def from_native(cls, native_sequence: oms.AASequence) -> Py_AASequence: """ Creates Py_AASequence from native pyOpenMS AASequence. Args: - native_sequence (oms.AASequence): + native_sequence (oms.AASequence): Native pyOpenMS AASequence object to wrap. Returns: - Py_AASequence: New wrapped opject + Py_AASequence: New wrapped object. """ return cls(native_sequence)🤖 Prompt for AI Agents