"""
The class LabelDescriptor allows the specification of a labeling strategy with
stable isotopic labels. It can then be used to determine the labeling state of
a given peptide and calculate the expected mass of alternative labeling
states.
"""
# Copyright 2015-2017 David M. Hollenstein, Jakob J. Hollenstein
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
######################## Python 2 and 3 compatibility #########################
from __future__ import absolute_import, division, print_function
from __future__ import unicode_literals
from future.utils import viewitems, viewkeys, viewvalues, listitems, listvalues
try:
#python 2.7
from itertools import izip as zip
except ImportError:
#python 3 series
pass
################################################################################
import itertools
import warnings
import maspy.auxiliary as aux
import maspy.constants
import maspy.peptidemethods
#TODO: review for compatibility to current maspy version
warnings.warn('Module maspy.sil is not up to date and may change in the future',
ImportWarning)
# --- Functions dealing with isotopic labels --- #
[docs]class LabelDescriptor(object):
"""Describes a MS1 stable isotope label setup for quantification.
:ivar labels: Contains a dictionary with all possible label states, keys
are increasing integers starting from 0, which correspond to the
different label states.
:ivar excludingModifictions: bool, True if any label has specified
excludingModifications
"""
def __init__(self):
self.labels = dict()
self.excludingModifictions = False
self._labelCounter = 0
[docs] def addLabel(self, aminoAcidLabels, excludingModifications=None):
"""Adds a new labelstate.
:param aminoAcidsLabels: Describes which amino acids can bear which
labels. Possible keys are the amino acids in one letter code and
'nTerm', 'cTerm'. Possible values are the modifications ids from
:attr:`maspy.constants.aaModMass` as strings or a list of strings.
An example for one expected label at the n-terminus and two expected
labels at each Lysine:
``{'nTerm': 'u:188', 'K': ['u:188', 'u:188']}``
:param excludingModifications: optional, A Dectionary that describes
which modifications can prevent the addition of labels. Keys and
values have to be the modifications ids from
:attr:`maspy.constants.aaModMass`. The key specifies the
modification that prevents the label modification specified by the
value. For example for each modification 'u:1' that is present at an
amino acid or terminus of a peptide the number of expected labels at
this position is reduced by one: ``{'u:1':'u:188'}``
"""
if excludingModifications is not None:
self.excludingModifictions = True
labelEntry = {'aminoAcidLabels': aminoAcidLabels,
'excludingModifications': excludingModifications
}
self.labels[self._labelCounter] = labelEntry
self._labelCounter += 1
[docs]def returnLabelStateMassDifferences(peptide, labelDescriptor, labelState=None,
sequence=None):
"""Calculates the mass difference for alternative possible label states of a
given peptide. See also :class:`LabelDescriptor`, :func:`returnLabelState()`
:param peptide: Peptide to calculate alternative label states
:param labelDescriptor: :class:`LabelDescriptor` describes the label setup
of an experiment
:param labelState: label state of the peptide, if None it is calculated by
:func:`returnLabelState()`
:param sequence: unmodified amino acid sequence of the "peptide", if None
it is generated by :func:`maspy.peptidemethods.removeModifications()`
:returns: {alternativeLabelSate: massDifference, ...} or {} if the peptide
label state is -1.
.. note:: The massDifference plus the peptide mass is the expected mass of
an alternatively labeled peptide
"""
if labelState is None:
labelState = returnLabelState(peptide, labelDescriptor)
if sequence is None:
sequence = maspy.peptidemethods.removeModifications(peptide)
if labelState < 0:
# special case for mixed label... #
return dict()
# define type and number of labels of the peptide
labelModNumbers = dict()
_positions = expectedLabelPosition(peptide,
labelDescriptor.labels[labelState],
sequence=sequence)
for labelStateModList in viewvalues(_positions):
for labelMod in labelStateModList:
labelModNumbers.setdefault(labelMod, int())
labelModNumbers[labelMod] += 1
# calculate the combined labels mass of the peptide
labelMass = int()
for labelMod, modCounts in viewitems(labelModNumbers):
labelMass += maspy.constants.aaModMass[labelMod] * modCounts
# calculate mass differences to all other possible label states
labelStateMassDifferences = dict()
for possibleLabelState in viewkeys(labelDescriptor.labels):
if possibleLabelState == labelState:
continue
labelModNumbers = dict()
_positions = expectedLabelPosition(peptide,
labelDescriptor.labels[possibleLabelState],
sequence=sequence)
for labelStateModList in viewvalues(_positions):
for labelMod in labelStateModList:
labelModNumbers.setdefault(labelMod, int())
labelModNumbers[labelMod] += 1
possibleLabelMass = int()
for labelMod, modCounts in viewitems(labelModNumbers):
possibleLabelMass += maspy.constants.aaModMass[labelMod] * modCounts
possibleLabelMassDifference = possibleLabelMass - labelMass
labelStateMassDifferences[possibleLabelState] = possibleLabelMassDifference
return labelStateMassDifferences
[docs]def returnLabelState(peptide, labelDescriptor, labelSymbols=None,
labelAminoacids=None):
"""Calculates the label state of a given peptide for the label setup
described in labelDescriptor
:param peptide: peptide which label state should be calcualted
:param labelDescriptor: :class:`LabelDescriptor`, describes the label setup
of an experiment.
:param labelSymbols: modifications that show a label, as returned by
:func:`modSymbolsFromLabelInfo`.
:param labelAminoacids: amino acids that can bear a label, as returned by
:func:`modAminoacidsFromLabelInfo`.
:returns: integer that shows the label state:
>=0: predicted label state of the peptide
-1: peptide sequence can't bear any labelState modifications
-2: peptide modifications don't fit to any predicted labelState
-3: peptide modifications fit to a predicted labelState, but not all
predicted labelStates are distinguishable
"""
if labelSymbols is None:
labelSymbols = modSymbolsFromLabelInfo(labelDescriptor)
if labelAminoacids is None:
labelAminoacids = modAminoacidsFromLabelInfo(labelDescriptor)
sequence = maspy.peptidemethods.removeModifications(peptide)
modPositions = maspy.peptidemethods.returnModPositions(peptide,
indexStart=0,
removeModString=False)
labelState = None
#No amino acids in sequence which can bear a label modification
#Note: at the moment presence of excluding modifications are ignored
_validator = lambda seq, aa: (True if seq.find(aa) == -1 else False)
if all([_validator(sequence, aa) for aa in labelAminoacids]):
#No terminal label modifications specified by labelDescriptor
if 'nTerm' not in labelAminoacids and 'cTerm' not in labelAminoacids:
labelState = -1
# Check if the peptide mofidifcations fit to any predicted label state
if labelState is None:
peptideLabelPositions = dict()
for labelSymbol in labelSymbols:
if labelSymbol in viewkeys(modPositions):
for sequencePosition in modPositions[labelSymbol]:
peptideLabelPositions.setdefault(sequencePosition, list())
peptideLabelPositions[sequencePosition].append(labelSymbol)
for sequencePosition in list(viewkeys(peptideLabelPositions)):
peptideLabelPositions[sequencePosition] = \
sorted(peptideLabelPositions[sequencePosition])
predictedLabelStates = dict()
for predictedLabelState, labelStateInfo in viewitems(labelDescriptor.labels):
expectedLabelMods = expectedLabelPosition(peptide, labelStateInfo,
sequence=sequence,
modPositions=modPositions)
predictedLabelStates[predictedLabelState] = expectedLabelMods
if peptideLabelPositions == expectedLabelMods:
#If another expectedLabel state has already been matched, then
#there is an ambiguity between label states ...
labelState = predictedLabelState
if labelState is None:
# Peptide mofidifcations don't fit to any predicted label state
labelState = -2
elif labelState != -1:
# Check if all predicted label states are distinguishable
_comb = set(itertools.combinations(range(len(predictedLabelStates)), 2))
for state1, state2 in _comb:
if predictedLabelStates[state1] == predictedLabelStates[state2]:
labelState = -3
break
return labelState
[docs]def modSymbolsFromLabelInfo(labelDescriptor):
"""Returns a set of all modiciation symbols which were used in the
labelDescriptor
:param labelDescriptor: :class:`LabelDescriptor` describes the label setup
of an experiment
:returns: #TODO: docstring
"""
modSymbols = set()
for labelStateEntry in viewvalues(labelDescriptor.labels):
for labelPositionEntry in viewvalues(labelStateEntry['aminoAcidLabels']):
for modSymbol in aux.toList(labelPositionEntry):
if modSymbol != '':
modSymbols.add(modSymbol)
return modSymbols
[docs]def modAminoacidsFromLabelInfo(labelDescriptor):
"""Returns a set of all amino acids and termini which can bear a label, as
described in "labelDescriptor".
:param labelDescriptor: :class:`LabelDescriptor` describes the label setup
of an experiment
:returns: #TODO: docstring
"""
modAminoacids = set()
for labelStateEntry in viewvalues(labelDescriptor.labels):
for labelPositionEntry in viewkeys(labelStateEntry['aminoAcidLabels']):
for modAminoacid in aux.toList(labelPositionEntry):
if modAminoacid != '':
modAminoacids.add(modAminoacid)
return modAminoacids
[docs]def expectedLabelPosition(peptide, labelStateInfo, sequence=None,
modPositions=None):
"""Returns a modification description of a certain label state of a peptide.
:param peptide: Peptide sequence used to calculat the expected label state
modifications
:param labelStateInfo: An entry of :attr:`LabelDescriptor.labels` that
describes a label state
:param sequence: unmodified amino acid sequence of :var:`peptide`, if None
it is generated by :func:`maspy.peptidemethods.removeModifications()`
:param modPositions: dictionary describing the modification state of
"peptide", if None it is generated by
:func:`maspy.peptidemethods.returnModPositions()`
:returns: {sequence position: sorted list of expected label modifications
on that position, ...
}
"""
if modPositions is None:
modPositions = maspy.peptidemethods.returnModPositions(peptide,
indexStart=0
)
if sequence is None:
sequence = maspy.peptidemethods.removeModifications(peptide)
currLabelMods = dict()
for labelPosition, labelSymbols in viewitems(labelStateInfo['aminoAcidLabels']):
labelSymbols = aux.toList(labelSymbols)
if labelSymbols == ['']:
pass
elif labelPosition == 'nTerm':
currLabelMods.setdefault(0, list())
currLabelMods[0].extend(labelSymbols)
else:
for sequencePosition in aux.findAllSubstrings(sequence,
labelPosition):
currLabelMods.setdefault(sequencePosition, list())
currLabelMods[sequencePosition].extend(labelSymbols)
if labelStateInfo['excludingModifications'] is not None:
for excludingMod, excludedLabelSymbol in viewitems(labelStateInfo['excludingModifications']):
if excludingMod not in modPositions:
continue
for excludingModPos in modPositions[excludingMod]:
if excludingModPos not in currLabelMods:
continue
if excludedLabelSymbol not in currLabelMods[excludingModPos]:
continue
if len(currLabelMods[excludingModPos]) == 1:
del(currLabelMods[excludingModPos])
else:
excludedModIndex = currLabelMods[excludingModPos].index(excludedLabelSymbol)
currLabelMods[excludingModPos].pop(excludedModIndex)
for sequencePosition in list(viewkeys(currLabelMods)):
currLabelMods[sequencePosition] = sorted(currLabelMods[sequencePosition])
return currLabelMods