Source code for maspy.sil

"""
The class LabelDescriptor allows the specification of a labeling strategy with
stable isotopic labels. It can then be used to determine the labeling state of
a given peptide and calculate the expected mass of alternative labeling
states.
"""

#  Copyright 2015-2017 David M. Hollenstein, Jakob J. Hollenstein
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

######################## Python 2 and 3 compatibility #########################
from __future__ import absolute_import, division, print_function
from __future__ import unicode_literals
from future.utils import viewitems, viewkeys, viewvalues, listitems, listvalues

try:
    #python 2.7
    from itertools import izip as zip
except ImportError:
    #python 3 series
    pass
################################################################################

import itertools
import warnings

import maspy.auxiliary as aux
import maspy.constants
import maspy.peptidemethods

#TODO: review for compatibility to current maspy version
warnings.warn('Module maspy.sil is not up to date and may change in the future',
              ImportWarning)

# --- Functions dealing with isotopic labels --- #
[docs]class LabelDescriptor(object): """Describes a MS1 stable isotope label setup for quantification. :ivar labels: Contains a dictionary with all possible label states, keys are increasing integers starting from 0, which correspond to the different label states. :ivar excludingModifictions: bool, True if any label has specified excludingModifications """ def __init__(self): self.labels = dict() self.excludingModifictions = False self._labelCounter = 0
[docs] def addLabel(self, aminoAcidLabels, excludingModifications=None): """Adds a new labelstate. :param aminoAcidsLabels: Describes which amino acids can bear which labels. Possible keys are the amino acids in one letter code and 'nTerm', 'cTerm'. Possible values are the modifications ids from :attr:`maspy.constants.aaModMass` as strings or a list of strings. An example for one expected label at the n-terminus and two expected labels at each Lysine: ``{'nTerm': 'u:188', 'K': ['u:188', 'u:188']}`` :param excludingModifications: optional, A Dectionary that describes which modifications can prevent the addition of labels. Keys and values have to be the modifications ids from :attr:`maspy.constants.aaModMass`. The key specifies the modification that prevents the label modification specified by the value. For example for each modification 'u:1' that is present at an amino acid or terminus of a peptide the number of expected labels at this position is reduced by one: ``{'u:1':'u:188'}`` """ if excludingModifications is not None: self.excludingModifictions = True labelEntry = {'aminoAcidLabels': aminoAcidLabels, 'excludingModifications': excludingModifications } self.labels[self._labelCounter] = labelEntry self._labelCounter += 1
[docs]def returnLabelStateMassDifferences(peptide, labelDescriptor, labelState=None, sequence=None): """Calculates the mass difference for alternative possible label states of a given peptide. See also :class:`LabelDescriptor`, :func:`returnLabelState()` :param peptide: Peptide to calculate alternative label states :param labelDescriptor: :class:`LabelDescriptor` describes the label setup of an experiment :param labelState: label state of the peptide, if None it is calculated by :func:`returnLabelState()` :param sequence: unmodified amino acid sequence of the "peptide", if None it is generated by :func:`maspy.peptidemethods.removeModifications()` :returns: {alternativeLabelSate: massDifference, ...} or {} if the peptide label state is -1. .. note:: The massDifference plus the peptide mass is the expected mass of an alternatively labeled peptide """ if labelState is None: labelState = returnLabelState(peptide, labelDescriptor) if sequence is None: sequence = maspy.peptidemethods.removeModifications(peptide) if labelState < 0: # special case for mixed label... # return dict() # define type and number of labels of the peptide labelModNumbers = dict() _positions = expectedLabelPosition(peptide, labelDescriptor.labels[labelState], sequence=sequence) for labelStateModList in viewvalues(_positions): for labelMod in labelStateModList: labelModNumbers.setdefault(labelMod, int()) labelModNumbers[labelMod] += 1 # calculate the combined labels mass of the peptide labelMass = int() for labelMod, modCounts in viewitems(labelModNumbers): labelMass += maspy.constants.aaModMass[labelMod] * modCounts # calculate mass differences to all other possible label states labelStateMassDifferences = dict() for possibleLabelState in viewkeys(labelDescriptor.labels): if possibleLabelState == labelState: continue labelModNumbers = dict() _positions = expectedLabelPosition(peptide, labelDescriptor.labels[possibleLabelState], sequence=sequence) for labelStateModList in viewvalues(_positions): for labelMod in labelStateModList: labelModNumbers.setdefault(labelMod, int()) labelModNumbers[labelMod] += 1 possibleLabelMass = int() for labelMod, modCounts in viewitems(labelModNumbers): possibleLabelMass += maspy.constants.aaModMass[labelMod] * modCounts possibleLabelMassDifference = possibleLabelMass - labelMass labelStateMassDifferences[possibleLabelState] = possibleLabelMassDifference return labelStateMassDifferences
[docs]def returnLabelState(peptide, labelDescriptor, labelSymbols=None, labelAminoacids=None): """Calculates the label state of a given peptide for the label setup described in labelDescriptor :param peptide: peptide which label state should be calcualted :param labelDescriptor: :class:`LabelDescriptor`, describes the label setup of an experiment. :param labelSymbols: modifications that show a label, as returned by :func:`modSymbolsFromLabelInfo`. :param labelAminoacids: amino acids that can bear a label, as returned by :func:`modAminoacidsFromLabelInfo`. :returns: integer that shows the label state: >=0: predicted label state of the peptide -1: peptide sequence can't bear any labelState modifications -2: peptide modifications don't fit to any predicted labelState -3: peptide modifications fit to a predicted labelState, but not all predicted labelStates are distinguishable """ if labelSymbols is None: labelSymbols = modSymbolsFromLabelInfo(labelDescriptor) if labelAminoacids is None: labelAminoacids = modAminoacidsFromLabelInfo(labelDescriptor) sequence = maspy.peptidemethods.removeModifications(peptide) modPositions = maspy.peptidemethods.returnModPositions(peptide, indexStart=0, removeModString=False) labelState = None #No amino acids in sequence which can bear a label modification #Note: at the moment presence of excluding modifications are ignored _validator = lambda seq, aa: (True if seq.find(aa) == -1 else False) if all([_validator(sequence, aa) for aa in labelAminoacids]): #No terminal label modifications specified by labelDescriptor if 'nTerm' not in labelAminoacids and 'cTerm' not in labelAminoacids: labelState = -1 # Check if the peptide mofidifcations fit to any predicted label state if labelState is None: peptideLabelPositions = dict() for labelSymbol in labelSymbols: if labelSymbol in viewkeys(modPositions): for sequencePosition in modPositions[labelSymbol]: peptideLabelPositions.setdefault(sequencePosition, list()) peptideLabelPositions[sequencePosition].append(labelSymbol) for sequencePosition in list(viewkeys(peptideLabelPositions)): peptideLabelPositions[sequencePosition] = \ sorted(peptideLabelPositions[sequencePosition]) predictedLabelStates = dict() for predictedLabelState, labelStateInfo in viewitems(labelDescriptor.labels): expectedLabelMods = expectedLabelPosition(peptide, labelStateInfo, sequence=sequence, modPositions=modPositions) predictedLabelStates[predictedLabelState] = expectedLabelMods if peptideLabelPositions == expectedLabelMods: #If another expectedLabel state has already been matched, then #there is an ambiguity between label states ... labelState = predictedLabelState if labelState is None: # Peptide mofidifcations don't fit to any predicted label state labelState = -2 elif labelState != -1: # Check if all predicted label states are distinguishable _comb = set(itertools.combinations(range(len(predictedLabelStates)), 2)) for state1, state2 in _comb: if predictedLabelStates[state1] == predictedLabelStates[state2]: labelState = -3 break return labelState
[docs]def modSymbolsFromLabelInfo(labelDescriptor): """Returns a set of all modiciation symbols which were used in the labelDescriptor :param labelDescriptor: :class:`LabelDescriptor` describes the label setup of an experiment :returns: #TODO: docstring """ modSymbols = set() for labelStateEntry in viewvalues(labelDescriptor.labels): for labelPositionEntry in viewvalues(labelStateEntry['aminoAcidLabels']): for modSymbol in aux.toList(labelPositionEntry): if modSymbol != '': modSymbols.add(modSymbol) return modSymbols
[docs]def modAminoacidsFromLabelInfo(labelDescriptor): """Returns a set of all amino acids and termini which can bear a label, as described in "labelDescriptor". :param labelDescriptor: :class:`LabelDescriptor` describes the label setup of an experiment :returns: #TODO: docstring """ modAminoacids = set() for labelStateEntry in viewvalues(labelDescriptor.labels): for labelPositionEntry in viewkeys(labelStateEntry['aminoAcidLabels']): for modAminoacid in aux.toList(labelPositionEntry): if modAminoacid != '': modAminoacids.add(modAminoacid) return modAminoacids
[docs]def expectedLabelPosition(peptide, labelStateInfo, sequence=None, modPositions=None): """Returns a modification description of a certain label state of a peptide. :param peptide: Peptide sequence used to calculat the expected label state modifications :param labelStateInfo: An entry of :attr:`LabelDescriptor.labels` that describes a label state :param sequence: unmodified amino acid sequence of :var:`peptide`, if None it is generated by :func:`maspy.peptidemethods.removeModifications()` :param modPositions: dictionary describing the modification state of "peptide", if None it is generated by :func:`maspy.peptidemethods.returnModPositions()` :returns: {sequence position: sorted list of expected label modifications on that position, ... } """ if modPositions is None: modPositions = maspy.peptidemethods.returnModPositions(peptide, indexStart=0 ) if sequence is None: sequence = maspy.peptidemethods.removeModifications(peptide) currLabelMods = dict() for labelPosition, labelSymbols in viewitems(labelStateInfo['aminoAcidLabels']): labelSymbols = aux.toList(labelSymbols) if labelSymbols == ['']: pass elif labelPosition == 'nTerm': currLabelMods.setdefault(0, list()) currLabelMods[0].extend(labelSymbols) else: for sequencePosition in aux.findAllSubstrings(sequence, labelPosition): currLabelMods.setdefault(sequencePosition, list()) currLabelMods[sequencePosition].extend(labelSymbols) if labelStateInfo['excludingModifications'] is not None: for excludingMod, excludedLabelSymbol in viewitems(labelStateInfo['excludingModifications']): if excludingMod not in modPositions: continue for excludingModPos in modPositions[excludingMod]: if excludingModPos not in currLabelMods: continue if excludedLabelSymbol not in currLabelMods[excludingModPos]: continue if len(currLabelMods[excludingModPos]) == 1: del(currLabelMods[excludingModPos]) else: excludedModIndex = currLabelMods[excludingModPos].index(excludedLabelSymbol) currLabelMods[excludingModPos].pop(excludedModIndex) for sequencePosition in list(viewkeys(currLabelMods)): currLabelMods[sequencePosition] = sorted(currLabelMods[sequencePosition]) return currLabelMods