diff --git a/music21/features/base.py b/music21/features/base.py index a22b81161..f26214c57 100644 --- a/music21/features/base.py +++ b/music21/features/base.py @@ -11,12 +11,14 @@ from __future__ import annotations from collections import Counter -from collections.abc import KeysView +from collections.abc import Collection, Iterable, KeysView, Sequence import os import pathlib import pickle +import typing as t import unittest +from music21 import chord from music21 import common from music21.common.parallel import safeToParallize from music21.common.types import StreamType @@ -25,12 +27,27 @@ from music21 import environment from music21 import exceptions21 from music21 import note +from music21 import pitch from music21 import stream from music21 import text -from music21.metadata.bundles import MetadataEntry +from music21.metadata.bundles import MetadataBundle, MetadataEntry + +if t.TYPE_CHECKING: + from music21.features import outputFormats + from music21.stream.base import SecondsMapEntry environLocal = environment.Environment('features.base') + +# A Stream or a path/reference that can be parsed into one. +type StreamOrPath = stream.Stream|MetadataEntry|str|pathlib.Path +# A single datum that a DataSet/DataInstance can ingest. +type DataSource = StreamOrPath|DataInstance +# A concrete class label or id value: a plain scalar. +type ClassValue = str|float|int +# A class value or id: either a fixed value or a pickleable function of the +# parsed Stream that produces one (evaluated lazily after parsing). +type ValueOrFunction = ClassValue|t.Callable[[stream.Stream], ClassValue] # ------------------------------------------------------------------------------ @@ -46,6 +63,8 @@ class Feature: Feature objects are simple. It is FeatureExtractors that store all metadata and processing routines for creating Feature objects. Normally you wouldn't create one of these yourself. + * Changed in v11: the `.vector` starts out as an empty list rather than None. + >>> myFeature = features.Feature() >>> myFeature.dimensions = 3 >>> myFeature.name = 'Random arguments' @@ -55,12 +74,12 @@ class Feature: >>> myFeature.discrete = False - The .vector is the most important part of the feature, and it starts out as None. + The .vector is the most important part of the feature, and it starts out empty. - >>> myFeature.vector is None - True + >>> myFeature.vector + [] - Calling .prepareVector() gives it a list of Zeros of the length of dimensions. + Calling .prepareVectors() gives it a list of zeros of the length of the dimensions. >>> myFeature.prepareVectors() @@ -88,30 +107,30 @@ class Feature: def __init__(self) -> None: # these values will be filled by the extractor self.dimensions: int = 1 # number of dimensions - # data storage; possibly use numpy array - self.vector = None + # data storage; possibly use numpy array. Populated by prepareVectors(). + self.vector: list[int|float] = [] - # consider not storing this values, as may not be necessary - self.name = None # string name representation - self.description = None # string description - self.isSequential = None # True or False - self.discrete = None # is discrete or continuous + # consider not storing these values, as they may not be necessary + self.name: str = '' # string name representation + self.description: str = '' # string description + self.isSequential: bool = True # True or False + self.discrete: bool = True # is discrete or continuous def _getVectors(self) -> list[int|float]: ''' - Prepare a vector of appropriate size and return + Prepare a vector of appropriate size and return it. ''' return [0] * self.dimensions - def prepareVectors(self): + def prepareVectors(self) -> None: ''' Prepare the vector stored in this feature. ''' self.vector = self._getVectors() - def normalize(self): + def normalize(self) -> None: ''' - Normalizes the vector so that the sum of its elements is 1. + Normalize the vector so that the sum of its elements is 1. ''' s = sum(self.vector) try: @@ -134,31 +153,34 @@ class FeatureExtractor: All Streams are internally converted to a DataInstance if necessary. Usage of a DataInstance offers significant performance advantages, as common forms of the Stream are cached for easy processing. + + * Changed in v11: `dimensions` now defaults to 1, so single-dimension + extractors no longer need to set it. `name`, `description`, + `isSequential`, `discrete`, and `normalize` are now class-level + attributes with non-None defaults; subclasses override them directly. + + This module's type annotations were added with AI assistance (Claude). ''' + # these class-level attributes are overridden by subclasses + id: str = '' # string identifier + name: str = '' # string name representation + description: str = '' # string description + isSequential: bool = True # True or False + dimensions: int = 1 # number of dimensions + discrete: bool = True # is discrete or continuous + normalize: bool = False # whether the feature vector is normalized + def __init__(self, - dataOrStream=None, + dataOrStream: stream.Stream|DataInstance|None = None, **keywords ) -> None: - self.stream = None # the original Stream, or None + self.stream: stream.Stream|None = None # the original Stream, or None self.data: DataInstance|None = None # a DataInstance object: use to get data self.setData(dataOrStream) - self.feature = None # Feature object that results from processing + self.feature: Feature|None = None # Feature object that results from processing - if not hasattr(self, 'name'): - self.name = None # string name representation - if not hasattr(self, 'description'): - self.description = None # string description - if not hasattr(self, 'isSequential'): - self.isSequential = None # True or False - if not hasattr(self, 'dimensions'): - self.dimensions = None # number of dimensions - if not hasattr(self, 'discrete'): - self.discrete = True # default - if not hasattr(self, 'normalize'): - self.normalize = False # default is no - - def setData(self, dataOrStream): + def setData(self, dataOrStream: stream.Stream|DataInstance|None) -> None: ''' Set the data that this FeatureExtractor will process. Either a Stream or a DataInstance object can be provided. @@ -177,9 +199,9 @@ def setData(self, dataOrStream): self.stream = None self.data = dataOrStream - def getAttributeLabels(self): + def getAttributeLabels(self) -> list[str]: ''' - Return a list of string in a form that is appropriate for data storage. + Return a list of strings in a form that is appropriate for data storage. >>> fe = features.jSymbolic.AmountOfArpeggiationFeature() >>> fe.getAttributeLabels() @@ -193,7 +215,7 @@ def getAttributeLabels(self): 'Fifths_Pitch_Histogram_9', 'Fifths_Pitch_Histogram_10', 'Fifths_Pitch_Histogram_11'] ''' - post = [] + post: list[str] = [] if self.dimensions == 1: post.append(self.name.replace(' ', '_')) else: @@ -201,7 +223,7 @@ def getAttributeLabels(self): post.append(f"{self.name.replace(' ', '_')}_{i}") return post - def fillFeatureAttributes(self, feature=None): + def fillFeatureAttributes(self, feature: Feature|None = None) -> Feature: # noinspection GrazieInspection ''' Fill the attributes of a Feature with the descriptors in the FeatureExtractor. @@ -209,6 +231,8 @@ def fillFeatureAttributes(self, feature=None): # operate on passed-in feature or self.feature if feature is None: feature = self.feature + if feature is None: # pragma: no cover + raise FeatureException('cannot fill attributes without a feature') feature.name = self.name feature.description = self.description feature.isSequential = self.isSequential @@ -216,7 +240,7 @@ def fillFeatureAttributes(self, feature=None): feature.discrete = self.discrete return feature - def prepareFeature(self): + def prepareFeature(self) -> None: ''' Prepare a new Feature object for data acquisition. @@ -234,14 +258,14 @@ def prepareFeature(self): self.fillFeatureAttributes() # will fill self.feature self.feature.prepareVectors() # will vector with necessary zeros - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in _feature. ''' # do work in subclass, calling on self.data pass - def extract(self, source=None): + def extract(self, source: stream.Stream|None = None) -> Feature: ''' Extract the feature and return the result. ''' @@ -250,13 +274,15 @@ def extract(self, source=None): # preparing the feature always sets self.feature to a new instance self.prepareFeature() self.process() # will set Feature object to _feature + if self.feature is None: # pragma: no cover + raise FeatureException('process() failed to produce a feature') if self.normalize: self.feature.normalize() return self.feature - def getBlankFeature(self): + def getBlankFeature(self) -> Feature: ''' - Return a properly configured plain feature as a placeholder + Return a properly configured plain feature as a placeholder. >>> fe = features.jSymbolic.InitialTimeSignatureFeature() >>> fe.name @@ -294,7 +320,7 @@ class StreamForms: it simple to add additional feature extractors at low additional time cost. ''' - def __init__(self, streamObj: stream.Stream, prepareStream=True): + def __init__(self, streamObj: stream.Stream, prepareStream: bool = True) -> None: self.stream = streamObj if self.stream is not None: if prepareStream: @@ -386,18 +412,18 @@ def _getIntervalHistogram(self, algorithm='midi') -> list[int]: return histo # ---------------------------------------------------------------------------- - def formPartitionByInstrument(self, prepared: stream.Stream): + def formPartitionByInstrument(self, prepared: stream.Stream) -> stream.Stream: from music21 import instrument return instrument.partitionByInstrument(prepared) - def formSetClassHistogram(self, prepared): + def formSetClassHistogram(self, prepared: stream.Stream) -> Counter[str]: return Counter([c.forteClassTnI for c in prepared]) - def formPitchClassSetHistogram(self, prepared): + def formPitchClassSetHistogram(self, prepared: stream.Stream) -> Counter[str]: return Counter([c.orderedPitchClassesString for c in prepared]) - def formTypesHistogram(self, prepared): - histo = {} + def formTypesHistogram(self, prepared: stream.Stream) -> dict[str, int]: + histo: dict[str, int] = {} # keys are methods on Chord keys = ['isTriad', 'isSeventh', 'isMajorTriad', 'isMinorTriad', @@ -414,7 +440,8 @@ def formTypesHistogram(self, prepared): histo[thisKey] += 1 return histo - def formGetElementsByClassMeasure(self, prepared): + def formGetElementsByClassMeasure(self, prepared: stream.Stream) -> stream.Stream: + post: stream.Stream if isinstance(prepared, stream.Score): post = stream.Stream() for p in prepared.parts: @@ -422,10 +449,10 @@ def formGetElementsByClassMeasure(self, prepared): for m in p.getElementsByClass(stream.Measure): post.insert(m.getOffsetBySite(p), m) else: - post = prepared.getElementsByClass(stream.Measure) + post = prepared.getElementsByClass(stream.Measure).stream() return post - def formChordify(self, prepared): + def formChordify(self, prepared: stream.Stream) -> stream.Stream: if isinstance(prepared, stream.Score): # options here permit getting part information out # of chordified representation @@ -436,28 +463,29 @@ def formChordify(self, prepared): # in the part? return prepared - def formQuarterLengthHistogram(self, prepared): + def formQuarterLengthHistogram(self, prepared: stream.Stream) -> Counter[float]: return Counter([float(n.quarterLength) for n in prepared]) - def formMidiPitchHistogram(self, pitches): + def formMidiPitchHistogram(self, pitches: Iterable[pitch.Pitch]) -> Counter[int]: return Counter([p.midi for p in pitches]) - def formPitchClassHistogram(self, pitches): + def formPitchClassHistogram(self, pitches: Iterable[pitch.Pitch]) -> list[int]: cc = Counter([p.pitchClass for p in pitches]) histo = [0] * 12 for k in cc: histo[k] = cc[k] return histo - def formMidiIntervalHistogram(self, unused): + def formMidiIntervalHistogram(self, unused: stream.Stream) -> list[int]: return self._getIntervalHistogram('midi') - def formContourList(self, prepared): + def formContourList(self, prepared: stream.Stream) -> list[int]: # list of all directed half steps - cList = [] + cList: list[int] = [] # if we have parts, must add one at a time + parts: list[stream.Stream] if prepared.hasPartLikeStreams(): - parts = prepared.parts + parts = list(t.cast('stream.Score', prepared).parts) else: parts = [prepared] # emulate a list @@ -474,21 +502,21 @@ def formContourList(self, prepared): iNext = i + 1 nNext = post[iNext] - if n.isChord: + if isinstance(n, chord.Chord): ps = n.sortDiatonicAscending().pitches[-1].midi else: # normal note - ps = n.pitch.midi - if nNext.isChord: + ps = t.cast('note.Note', n).pitch.midi + if isinstance(nNext, chord.Chord): psNext = nNext.sortDiatonicAscending().pitches[-1].midi else: # normal note - psNext = nNext.pitch.midi + psNext = t.cast('note.Note', nNext).pitch.midi cList.append(psNext - ps) # environLocal.printDebug(['contourList', cList]) return cList - def formSecondsMap(self, prepared): - post = [] + def formSecondsMap(self, prepared: stream.Stream) -> list[SecondsMapEntry]: + post: list[SecondsMapEntry] = [] secondsMap = prepared.secondsMap # filter only notes; all elements would otherwise be gathered for bundle in secondsMap: @@ -496,7 +524,7 @@ def formSecondsMap(self, prepared): post.append(bundle) return post - def formBeatHistogram(self, secondsMap): + def formBeatHistogram(self, secondsMap: Iterable[SecondsMapEntry]) -> list[int]: secondsList = [d['durationSeconds'] for d in secondsMap] bpmList = [round(60.0 / d) for d in secondsList] histogram = [0] * 200 @@ -541,7 +569,9 @@ class DataInstance: ''' # pylint: disable=redefined-builtin # noinspection PyShadowingBuiltins - def __init__(self, streamOrPath=None, id=None): + def __init__(self, streamOrPath: StreamOrPath|None = None, + id: ValueOrFunction|None = None) -> None: + self.stream: stream.Stream|None if isinstance(streamOrPath, stream.Stream): self.stream = streamOrPath self.streamPath = None @@ -551,6 +581,7 @@ def __init__(self, streamOrPath=None, id=None): # store an id for the source stream: file path url, corpus url # or metadata title + self._id: ValueOrFunction if id is not None: self._id = id elif ((s := self.stream) is not None @@ -570,24 +601,24 @@ def __init__(self, streamOrPath=None, id=None): self._id = '' # the attribute name in the data set for this label - self.classLabel = None + self.classLabel: str = '' # store the class value for this data instance - self._classValue = None + self._classValue: ValueOrFunction|None = None self.partsCount = 0 - self.forms = None + self.forms: StreamForms|None = None # store a list of voices, extracted from each part, - self.formsByVoice = [] + self.formsByVoice: list[StreamForms] = [] # if parts exist, store a forms for each - self.formsByPart = [] + self.formsByPart: list[StreamForms] = [] - self.featureExtractorClassesForParallelRunning = [] + self.featureExtractorClassesForParallelRunning: list[type[FeatureExtractor]] = [] if self.stream is not None: self.setupPostStreamParse() - def setupPostStreamParse(self): + def setupPostStreamParse(self) -> None: ''' Set up the StreamForms objects and other things that need to be done after a Stream is passed in but before @@ -598,12 +629,15 @@ def setupPostStreamParse(self): # perform basic operations that are performed on all # streams + if self.stream is None: # pragma: no cover + return + # store a dictionary of StreamForms self.forms = StreamForms(self.stream) # if parts exist, store a forms for each self.formsByPart = [] - if hasattr(self.stream, 'parts'): + if isinstance(self.stream, stream.Score): self.partsCount = len(self.stream.parts) for p in self.stream.parts: # note that this will join ties and expand rests again @@ -614,7 +648,7 @@ def setupPostStreamParse(self): for v in self.stream[stream.Voice]: self.formsByPart.append(StreamForms(v)) - def setClassLabel(self, classLabel, classValue=None): + def setClassLabel(self, classLabel: str, classValue: ValueOrFunction|None = None) -> None: ''' Set the class label, as well as the class value if known. The class label is the attribute name used to define the class of this data instance. @@ -627,29 +661,30 @@ def setClassLabel(self, classLabel, classValue=None): self.classLabel = classLabel self._classValue = classValue - def getClassValue(self): - if self._classValue is None or callable(self._classValue) and self.stream is None: - return '' - - if callable(self._classValue) and self.stream is not None: - self._classValue = self._classValue(self.stream) - - return self._classValue - - def getId(self): - if self._id is None or callable(self._id) and self.stream is None: + def getClassValue(self) -> ClassValue: + classValue = self._classValue + if classValue is None: return '' - - if callable(self._id) and self.stream is not None: - self._id = self._id(self.stream) - - # make sure there are no spaces - try: - return self._id.replace(' ', '_') - except AttributeError as e: - raise AttributeError(str(self._id)) from e - - def parseStream(self): + if callable(classValue): + if self.stream is None: + return '' + classValue = classValue(self.stream) + self._classValue = classValue + return classValue + + def getId(self) -> str: + idValue = self._id + if callable(idValue): + if self.stream is None: + return '' + idValue = idValue(self.stream) + self._id = idValue + # make sure there are no spaces; ids that are not strings are an error + if isinstance(idValue, str): + return idValue.replace(' ', '_') + raise AttributeError(str(idValue)) + + def parseStream(self) -> None: ''' If a path to a Stream has been passed in at creation, then this will parse it (whether it's a corpus string, @@ -679,7 +714,10 @@ def parseStream(self): self.stream = s self.setupPostStreamParse() - def __getitem__(self, key): + def __getitem__(self, key: str): + # the return is deliberately left untyped: a "form" can be a Stream, + # a Counter, a list, a float, etc., depending on the key, and any + # concrete annotation would be a lie that breaks the many call sites. ''' Get a form of this Stream, using a cached version if available. @@ -704,6 +742,8 @@ def __getitem__(self, key): return self.formsByVoice # try to create by calling the attribute # will raise an attribute error if there is a problem + if self.forms is None: # pragma: no cover + raise FeatureException('cannot get a form from an unparsed DataInstance') return self.forms[key] @@ -742,17 +782,18 @@ class DataSet: Set ds.quiet = False to print them regardless of debug mode. ''' - def __init__(self, classLabel=None, featureExtractors=()): + def __init__(self, classLabel: str|None = None, + featureExtractors: Collection[type[FeatureExtractor]] = ()) -> None: # assume a two dimensional array - self.dataInstances = [] + self.dataInstances: list[DataInstance] = [] # order of feature extractors is the order used in the presentations - self._featureExtractors = [] - self._instantiatedFeatureExtractors = [] + self._featureExtractors: list[type[FeatureExtractor]] = [] + self._instantiatedFeatureExtractors: list[FeatureExtractor] = [] # the label of the class self._classLabel = classLabel # store a multidimensional storage of all features - self.features = [] + self.features: list[list[Feature]] = [] self.failFast = False self.quiet = True @@ -761,24 +802,27 @@ def __init__(self, classLabel=None, featureExtractors=()): # set extractors self.addFeatureExtractors(featureExtractors) - def getClassLabel(self): + def getClassLabel(self) -> str|None: return self._classLabel - def addFeatureExtractors(self, values): + def addFeatureExtractors( + self, + values: type[FeatureExtractor]|Collection[type[FeatureExtractor]] + ) -> None: ''' Add one or more FeatureExtractor objects, either as a list or as an individual object. ''' # features are instantiated here # however, they do not have a data assignment - if not common.isIterable(values): + if isinstance(values, type): # a single FeatureExtractor subclass values = [values] # need to create instances for sub in values: self._featureExtractors.append(sub) self._instantiatedFeatureExtractors.append(sub()) - def getAttributeLabels(self, includeClassLabel=True, - includeId=True): + def getAttributeLabels(self, includeClassLabel: bool = True, + includeId: bool = True) -> list[str]: ''' Return a list of all attribute labels. Optionally add a class label field and/or an id field. @@ -805,7 +849,8 @@ def getAttributeLabels(self, includeClassLabel=True, post.append(self._classLabel.replace(' ', '_')) return post - def getDiscreteLabels(self, includeClassLabel=True, includeId=True): + def getDiscreteLabels(self, includeClassLabel: bool = True, + includeId: bool = True) -> list[bool|None]: ''' Return column labels for discrete status. @@ -816,7 +861,7 @@ def getDiscreteLabels(self, includeClassLabel=True, includeId=True): [None, False, False, False, False, False, False, False, False, False, False, False, False, True, True] ''' - post = [] + post: list[bool|None] = [] if includeId: post.append(None) # just a spacer for fe in self._instantiatedFeatureExtractors: @@ -827,9 +872,9 @@ def getDiscreteLabels(self, includeClassLabel=True, includeId=True): post.append(True) return post - def getClassPositionLabels(self, includeId=True): + def getClassPositionLabels(self, includeId: bool = True) -> list[bool|None]: ''' - Return column labels for the presence of a class definition + Return column labels for the presence of a class definition. >>> f = [features.jSymbolic.PitchClassDistributionFeature, ... features.jSymbolic.ChangesOfMeterFeature] @@ -838,7 +883,7 @@ def getClassPositionLabels(self, includeId=True): [None, False, False, False, False, False, False, False, False, False, False, False, False, False, True] ''' - post = [] + post: list[bool|None] = [] if includeId: post.append(None) # just a spacer for fe in self._instantiatedFeatureExtractors: @@ -849,13 +894,18 @@ def getClassPositionLabels(self, includeId=True): post.append(True) return post - def addMultipleData(self, dataList, classValues, ids=None): + def addMultipleData( + self, + dataList: Sequence[DataSource]|MetadataBundle, + classValues: Sequence[ValueOrFunction]|t.Callable[[stream.Stream], ClassValue], + ids: Sequence[ValueOrFunction|None]|t.Callable[[stream.Stream], str]|None = None, + ) -> None: ''' - add multiple data points at the same time. + Add multiple data points at the same time. - Requires an iterable (including MetadataBundle) for dataList holding - types that can be passed to addData, and an equally sized list of dataValues - and an equally sized list of ids (or None) + Requires a sequence (including MetadataBundle) for dataList holding + types that can be passed to addData, an equally sized sequence of + classValues, and an equally sized sequence of ids (or None). classValues can also be a pickleable function that will be called on each instance after parsing, as can ids. @@ -870,35 +920,38 @@ def addMultipleData(self, dataList, classValues, ids=None): raise DataSetException( 'If ids is not a function or None, it must have the same length as dataList') + classValueList: Sequence[ValueOrFunction|None] if callable(classValues): try: pickle.dumps(classValues) except pickle.PicklingError: raise DataSetException('classValues if a function must be pickleable. ' + 'Lambda and some other functions are not.') + classValueList = [classValues] * len(dataList) + else: + classValueList = classValues - classValues = [classValues] * len(dataList) - + idList: Sequence[ValueOrFunction|None] if callable(ids): try: pickle.dumps(ids) except pickle.PicklingError: raise DataSetException('ids if a function must be pickleable. ' + 'Lambda and some other functions are not.') - - ids = [ids] * len(dataList) + idList = [ids] * len(dataList) elif ids is None: - ids = [None] * len(dataList) + idList = [None] * len(dataList) + else: + idList = ids for i in range(len(dataList)): - d = dataList[i] - cv = classValues[i] - thisId = ids[i] - self.addData(d, cv, thisId) + self.addData(dataList[i], classValueList[i], idList[i]) # pylint: disable=redefined-builtin # noinspection PyShadowingBuiltins - def addData(self, dataOrStreamOrPath, classValue=None, id=None): + def addData(self, dataOrStreamOrPath: DataSource, + classValue: ValueOrFunction|None = None, + id: ValueOrFunction|None = None) -> None: ''' Add a Stream, DataInstance, MetadataEntry, or path (Posix or str) to a corpus or local file to this data set. @@ -910,31 +963,25 @@ def addData(self, dataOrStreamOrPath, classValue=None, id=None): raise DataSetException( 'cannot add data unless a class label for this DataSet has been set.') - s = None if isinstance(dataOrStreamOrPath, DataInstance): di = dataOrStreamOrPath - s = di.stream - if s is None: - s = di.streamPath else: - # all else are stored directly - s = dataOrStreamOrPath di = DataInstance(dataOrStreamOrPath, id=id) di.setClassLabel(self._classLabel, classValue) self.dataInstances.append(di) - def process(self): + def process(self) -> None: ''' Process all Data with all FeatureExtractors. Processed data is stored internally as numerous Feature objects. ''' if self.runParallel and safeToParallize(): - return self._processParallel() + self._processParallel() else: - return self._processNonParallel() + self._processNonParallel() - def _processParallel(self): + def _processParallel(self) -> None: ''' Run a set of processes in parallel. ''' @@ -957,7 +1004,7 @@ def _processParallel(self): environLocal.printDebug(e) else: environLocal.warn(e) - self.features = featureData + self.features = list(featureData) for i, di in enumerate(self.dataInstances): if callable(di._classValue): @@ -965,9 +1012,9 @@ def _processParallel(self): if callable(di._id): di._id = ids[i] - def _processNonParallel(self): + def _processNonParallel(self) -> None: ''' - The traditional way: run non-parallel + The traditional way: run non-parallel. ''' # clear features self.features = [] @@ -994,14 +1041,15 @@ def _processNonParallel(self): # rows will align with data the order of DataInstances self.features.append(row) - def getFeaturesAsList(self, includeClassLabel=True, includeId=True, concatenateLists=True): + def getFeaturesAsList(self, includeClassLabel: bool = True, includeId: bool = True, + concatenateLists: bool = True) -> list: ''' Get processed data as a list of lists, merging any sub-lists in multidimensional features. ''' - post = [] + post: list = [] for i, row in enumerate(self.features): - v = [] + v: list = [] di = self.dataInstances[i] if includeId: @@ -1020,20 +1068,21 @@ def getFeaturesAsList(self, includeClassLabel=True, includeId=True, concatenateL else: return post - def getUniqueClassValues(self): + def getUniqueClassValues(self) -> list[ClassValue]: ''' Return a list of unique class values. ''' - post = [] + post: list[ClassValue] = [] for di in self.dataInstances: v = di.getClassValue() if v not in post: post.append(v) return post - def _getOutputFormat(self, featureFormat): + def _getOutputFormat(self, featureFormat: str) -> outputFormats.OutputFormat|None: from music21.features import outputFormats - if featureFormat.lower() in ['tab', 'orange', 'taborange', None]: + outputFormat: outputFormats.OutputFormat + if featureFormat.lower() in ['tab', 'orange', 'taborange']: outputFormat = outputFormats.OutputTabOrange(dataSet=self) elif featureFormat.lower() in ['csv', 'comma']: outputFormat = outputFormats.OutputCSV(dataSet=self) @@ -1043,7 +1092,7 @@ def _getOutputFormat(self, featureFormat): return None return outputFormat - def _getOutputFormatFromFilePath(self, fp): + def _getOutputFormatFromFilePath(self, fp: str|pathlib.Path) -> outputFormats.OutputFormat|None: ''' Get an output format from a file path if possible, otherwise return None. @@ -1056,22 +1105,25 @@ def _getOutputFormatFromFilePath(self, fp): True ''' # get format from fp if possible + fp = str(fp) of = None if '.' in fp: - if self._getOutputFormat(fp.split('.')[-1]) is not None: - of = self._getOutputFormat(fp.split('.')[-1]) + of = self._getOutputFormat(fp.rsplit('.', maxsplit=1)[-1]) return of - def getString(self, outputFmt='tab'): + def getString(self, outputFmt: str = 'tab') -> str: ''' Get a string representation of the data set in a specific format. ''' # pass reference to self to output outputFormat = self._getOutputFormat(outputFmt) + if outputFormat is None: # pragma: no cover + raise DataSetException(f'no output format could be defined from {outputFmt}') return outputFormat.getString() # pylint: disable=redefined-builtin - def write(self, fp=None, format=None, includeClassLabel=True): + def write(self, fp: str|pathlib.Path|None = None, format: str|None = None, + includeClassLabel: bool = True) -> str|pathlib.Path: ''' Set the output format object. ''' @@ -1089,9 +1141,11 @@ def write(self, fp=None, format=None, includeClassLabel=True): return outputFormat.write(fp=fp, includeClassLabel=includeClassLabel) -def _dataSetParallelSubprocess(dataInstance, failFast): - row = [] - errors = [] +def _dataSetParallelSubprocess( + dataInstance: DataInstance, failFast: bool +) -> tuple[list[Feature], list[str], ClassValue, str]: + row: list[Feature] = [] + errors: list[str] = [] # howBigWeCopied = len(pickle.dumps(dataInstance)) # print('Starting ', dataInstance, ' Size: ', howBigWeCopied) for feClass in dataInstance.featureExtractorClassesForParallelRunning: @@ -1113,12 +1167,12 @@ def _dataSetParallelSubprocess(dataInstance, failFast): return row, errors, dataInstance.getClassValue(), dataInstance.getId() -def allFeaturesAsList(streamInput): +def allFeaturesAsList(streamInput: DataSource) -> list: # noinspection PyShadowingNames ''' - returns a list containing ALL currently implemented feature extractors + Returns a list containing ALL currently implemented feature extractors. - streamInput can be a Stream, DataInstance, or path to a corpus or local + `streamInput` can be a Stream, DataInstance, or path to a corpus or local file to this data set. >>> s = converter.parse('tinynotation: 4/4 c4 d e2') @@ -1142,10 +1196,12 @@ def allFeaturesAsList(streamInput): # ------------------------------------------------------------------------------ -def extractorsById(idOrList, library=('jSymbolic', 'native')): +def extractorsById(idOrList: str|Iterable[str], + library: str|Iterable[str] = ('jSymbolic', 'native') + ) -> list[type[FeatureExtractor]]: ''' Given one or more :class:`~music21.features.FeatureExtractor` ids, return the - appropriate subclass. An optional `library` argument can be added to define which + appropriate subclass. An optional `library` argument can be added to define which module is used. Current options are jSymbolic and native. >>> features.extractorsById('p20') @@ -1156,10 +1212,12 @@ def extractorsById(idOrList, library=('jSymbolic', 'native')): >>> [x.id for x in features.extractorsById(['p19', 'p20'])] ['P19', 'P20'] - Normalizes case: + Normalizes case, and strips hyphens and spaces from the given ids: >>> [x.id for x in features.extractorsById(['r31', 'r32', 'r33', 'r34', 'r35', 'p1', 'p2'])] ['R31', 'R32', 'R33', 'R34', 'R35', 'P1', 'P2'] + >>> [x.id for x in features.extractorsById(['p-20', 'p 21'])] + ['P20', 'P21'] Get all feature extractors from all libraries: @@ -1171,27 +1229,25 @@ def extractorsById(idOrList, library=('jSymbolic', 'native')): from music21.features import jSymbolic from music21.features import native - if not common.isIterable(library): - library = [library] + # a bare string is a single library/id, not an iterable of them + libraries: Iterable[str] = [library] if isinstance(library, str) else library - featureExtractors = [] - for lib in library: + featureExtractors: list[type[FeatureExtractor]] = [] + for lib in libraries: if lib.lower() in ['jsymbolic', 'all']: featureExtractors += jSymbolic.featureExtractors elif lib.lower() in ['native', 'all']: featureExtractors += native.featureExtractors - if not common.isIterable(idOrList): - idOrList = [idOrList] + ids: Iterable[str] = [idOrList] if isinstance(idOrList, str) else idOrList - flatIds = [] - for featureId in idOrList: + flatIds: list[str] = [] + for featureId in ids: featureId = featureId.strip().lower() - featureId.replace('-', '') - featureId.replace(' ', '') + featureId = featureId.replace('-', '').replace(' ', '') flatIds.append(featureId) - post = [] + post: list[type[FeatureExtractor]] = [] if not flatIds: return post @@ -1201,7 +1257,9 @@ def extractorsById(idOrList, library=('jSymbolic', 'native')): return post -def extractorById(idOrList, library=('jSymbolic', 'native')): +def extractorById(idOrList: str|Iterable[str], + library: str|Iterable[str] = ('jSymbolic', 'native') + ) -> type[FeatureExtractor]|None: ''' Get the first feature matched by extractorsById(). @@ -1218,29 +1276,31 @@ def extractorById(idOrList, library=('jSymbolic', 'native')): return None # no match -def vectorById(streamObj, vectorId, library=('jSymbolic', 'native')): +def vectorById(streamObj: stream.Stream|DataInstance, vectorId: str|Iterable[str], + library: str|Iterable[str] = ('jSymbolic', 'native')) -> list[int|float]|None: ''' - Utility function to get a vector from an extractor + Utility function to get a vector from an extractor. >>> s = stream.Stream() >>> s.append(note.Note('A4')) >>> features.vectorById(s, 'p20') [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] ''' - fe = extractorById(vectorId, library)(streamObj) # call class with stream - if fe is None: + extractorClass = extractorById(vectorId, library) + if extractorClass is None: return None # could raise exception + fe = extractorClass(streamObj) # call class with stream return fe.extract().vector -def getIndex(featureString, extractorType=None): +def getIndex(featureString: str, extractorType: str|None = None) -> tuple[int, str]|None: ''' Returns the list index of the given feature extractor and the feature extractor - category (jsymbolic or native). If feature extractor string is not in either - jsymbolic or native feature extractors, returns None + category (jsymbolic or native). If the feature extractor string is not in either + the jsymbolic or native feature extractors, returns None. - optionally include the extractorType ('jsymbolic' or 'native') if known - and searching will be made more efficient + Optionally include the extractorType ('jsymbolic' or 'native') if known + and searching will be made more efficient. >>> features.getIndex('Range') (61, 'jsymbolic') @@ -1860,9 +1920,9 @@ def testParallelRun(self): # ''').strip()) -def _pickleFunctionNumPitches(bachStream): +def _pickleFunctionNumPitches(bachStream) -> int: ''' - A function for documentation testing of a pickleable function + A function for documentation testing of a pickleable function. ''' return len(bachStream.pitches) diff --git a/music21/features/jSymbolic.py b/music21/features/jSymbolic.py index 22e022e8b..dc48ea6ab 100644 --- a/music21/features/jSymbolic.py +++ b/music21/features/jSymbolic.py @@ -9,11 +9,14 @@ # ------------------------------------------------------------------------------ ''' The features implemented here are based on those found in jSymbolic and -defined in Cory McKay's MA Thesis, "Automatic Genre Classification of MIDI Recordings" +defined in Cory McKay's MA Thesis, "Automatic Genre Classification of MIDI Recordings". + +Type annotations in this module were added with AI assistance (Claude). ''' from __future__ import annotations from collections import OrderedDict +from collections.abc import Sequence import copy import math from math import isclose @@ -50,21 +53,18 @@ class MelodicIntervalHistogramFeature(featuresModule.FeatureExtractor): [0.144..., 0.220..., 0.364..., 0.062..., 0.050...] ''' id = 'M1' + name = 'Melodic Interval Histogram' + description = ('A features array with bins corresponding to ' + 'the values of the melodic interval histogram.') + dimensions = 128 + normalize = True - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Melodic Interval Histogram' - self.description = ('A features array with bins corresponding to ' - 'the values of the melodic interval histogram.') - self.isSequential = True - self.dimensions = 128 - self.normalize = True - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') for i, value in enumerate(self.data['midiIntervalHistogram']): self.feature.vector[i] = value @@ -80,19 +80,15 @@ class AverageMelodicIntervalFeature(featuresModule.FeatureExtractor): [2.44...] ''' id = 'M2' + name = 'Average Melodic Interval' + description = 'Average melodic interval (in semitones).' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Average Melodic Interval' - self.description = 'Average melodic interval (in semitones).' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') values = [] # already summed by part if parts exist histo = self.data['midiIntervalHistogram'] @@ -114,19 +110,15 @@ class MostCommonMelodicIntervalFeature(featuresModule.FeatureExtractor): [2] ''' id = 'M3' + name = 'Most Common Melodic Interval' + description = 'Melodic interval with the highest frequency.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Most Common Melodic Interval' - self.description = 'Melodic interval with the highest frequency.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # already summed by part if parts exist histo = self.data['midiIntervalHistogram'] maxValue = max(histo) @@ -145,21 +137,17 @@ class DistanceBetweenMostCommonMelodicIntervalsFeature( [1] ''' id = 'M4' + name = 'Distance Between Most Common Melodic Intervals' + description = ('Absolute value of the difference between the ' + 'most common melodic interval and the second most ' + 'common melodic interval.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Distance Between Most Common Melodic Intervals' - self.description = ('Absolute value of the difference between the ' - 'most common melodic interval and the second most ' - 'common melodic interval.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # copy b/c will manipulate histo = copy.deepcopy(self.data['midiIntervalHistogram']) maxValue = max(histo) @@ -183,19 +171,15 @@ class MostCommonMelodicIntervalPrevalenceFeature( [0.364...] ''' id = 'M5' + name = 'Most Common Melodic Interval Prevalence' + description = 'Fraction of melodic intervals that belong to the most common interval.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Most Common Melodic Interval Prevalence' - self.description = 'Fraction of melodic intervals that belong to the most common interval.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # copy b/c will manipulate histo = copy.deepcopy(self.data['midiIntervalHistogram']) maxValue = max(histo) @@ -216,21 +200,17 @@ class RelativeStrengthOfMostCommonIntervalsFeature( [0.603...] ''' id = 'M6' + name = 'Relative Strength of Most Common Intervals' + description = ('Fraction of melodic intervals that belong ' + 'to the second most common interval divided by the ' + 'fraction of melodic intervals belonging to the most common interval.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Relative Strength of Most Common Intervals' - self.description = ('Fraction of melodic intervals that belong ' - 'to the second most common interval divided by the ' - 'fraction of melodic intervals belonging to the most common interval.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # copy b/c will manipulate histo = copy.deepcopy(self.data['midiIntervalHistogram']) count = sum(histo) @@ -255,20 +235,16 @@ class NumberOfCommonMelodicIntervalsFeature(featuresModule.FeatureExtractor): [3] ''' id = 'M7' + name = 'Number of Common Melodic Intervals' + description = ('Number of melodic intervals that represent ' + 'at least 9% of all melodic intervals.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Number of Common Melodic Intervals' - self.description = ('Number of melodic intervals that represent ' - 'at least 9% of all melodic intervals.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['midiIntervalHistogram'] total = sum(histo) if not total: @@ -294,21 +270,17 @@ class AmountOfArpeggiationFeature(featuresModule.FeatureExtractor): [0.333...] ''' id = 'M8' + name = 'Amount of Arpeggiation' + description = ('Fraction of horizontal intervals that are repeated notes, ' + 'minor thirds, major thirds, perfect fifths, minor sevenths, ' + 'major sevenths, octaves, minor tenths or major tenths.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Amount of Arpeggiation' - self.description = ('Fraction of horizontal intervals that are repeated notes, ' - 'minor thirds, major thirds, perfect fifths, minor sevenths, ' - 'major sevenths, octaves, minor tenths or major tenths.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['midiIntervalHistogram'] total = sum(histo) if total == 0: @@ -326,7 +298,7 @@ def process(self): class RepeatedNotesFeature(featuresModule.FeatureExtractor): ''' - Fraction of notes that are repeated melodically + Fraction of notes that are repeated melodically. >>> s = corpus.parse('bwv66.6') >>> fe = features.jSymbolic.RepeatedNotesFeature(s) @@ -335,19 +307,15 @@ class RepeatedNotesFeature(featuresModule.FeatureExtractor): [0.144...] ''' id = 'M9' + name = 'Repeated Notes' + description = 'Fraction of notes that are repeated melodically.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Repeated Notes' - self.description = 'Fraction of notes that are repeated melodically.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['midiIntervalHistogram'] total = sum(histo) if total == 0: @@ -374,19 +342,15 @@ class ChromaticMotionFeature(featuresModule.FeatureExtractor): [0.220...] ''' id = 'm10' + name = 'Chromatic Motion' + description = 'Fraction of melodic intervals corresponding to a semi-tone.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Chromatic Motion' - self.description = 'Fraction of melodic intervals corresponding to a semi-tone.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['midiIntervalHistogram'] total = sum(histo) if not total: @@ -401,7 +365,7 @@ def process(self): class StepwiseMotionFeature(featuresModule.FeatureExtractor): ''' - Fraction of melodic intervals that corresponded to a minor or major second + Fraction of melodic intervals that correspond to a minor or major second. >>> s = corpus.parse('bwv66.6') >>> fe = features.jSymbolic.StepwiseMotionFeature(s) @@ -410,20 +374,16 @@ class StepwiseMotionFeature(featuresModule.FeatureExtractor): [0.584...] ''' id = 'M11' + name = 'Stepwise Motion' + description = ('Fraction of melodic intervals that corresponded ' + 'to a minor or major second.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Stepwise Motion' - self.description = ('Fraction of melodic intervals that corresponded ' - 'to a minor or major second.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['midiIntervalHistogram'] total = sum(histo) if not total: @@ -438,7 +398,7 @@ def process(self): class MelodicThirdsFeature(featuresModule.FeatureExtractor): ''' - Fraction of melodic intervals that are major or minor thirds + Fraction of melodic intervals that are major or minor thirds. >>> s = corpus.parse('bwv66.6') >>> fe = features.jSymbolic.MelodicThirdsFeature(s) @@ -447,19 +407,15 @@ class MelodicThirdsFeature(featuresModule.FeatureExtractor): [0.113...] ''' id = 'M12' + name = 'Melodic Thirds' + description = 'Fraction of melodic intervals that are major or minor thirds.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Melodic Thirds' - self.description = 'Fraction of melodic intervals that are major or minor thirds.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['midiIntervalHistogram'] total = sum(histo) if not total: @@ -474,7 +430,7 @@ def process(self): class MelodicFifthsFeature(featuresModule.FeatureExtractor): ''' - Fraction of melodic intervals that are perfect fifths + Fraction of melodic intervals that are perfect fifths. >>> s = corpus.parse('bwv66.6') >>> fe = features.jSymbolic.MelodicFifthsFeature(s) @@ -483,19 +439,15 @@ class MelodicFifthsFeature(featuresModule.FeatureExtractor): [0.056...] ''' id = 'M13' + name = 'Melodic Fifths' + description = 'Fraction of melodic intervals that are perfect fifths.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Melodic Fifths' - self.description = 'Fraction of melodic intervals that are perfect fifths.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['midiIntervalHistogram'] total = sum(histo) if not total: @@ -510,7 +462,7 @@ def process(self): class MelodicTritonesFeature(featuresModule.FeatureExtractor): ''' - Fraction of melodic intervals that are tritones + Fraction of melodic intervals that are tritones. >>> s = corpus.parse('bwv66.6') >>> fe = features.jSymbolic.MelodicTritonesFeature(s) @@ -519,19 +471,15 @@ class MelodicTritonesFeature(featuresModule.FeatureExtractor): [0.012...] ''' id = 'M14' + name = 'Melodic Tritones' + description = 'Fraction of melodic intervals that are tritones.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Melodic Tritones' - self.description = 'Fraction of melodic intervals that are tritones.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['midiIntervalHistogram'] total = sum(histo) if not total: @@ -546,7 +494,7 @@ def process(self): class MelodicOctavesFeature(featuresModule.FeatureExtractor): ''' - Fraction of melodic intervals that are octaves + Fraction of melodic intervals that are octaves. >>> s = corpus.parse('bwv66.6') >>> fe = features.jSymbolic.MelodicOctavesFeature(s) @@ -555,19 +503,15 @@ class MelodicOctavesFeature(featuresModule.FeatureExtractor): [0.018...] ''' id = 'M15' + name = 'Melodic Octaves' + description = 'Fraction of melodic intervals that are octaves.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Melodic Octaves' - self.description = 'Fraction of melodic intervals that are octaves.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['midiIntervalHistogram'] total = sum(histo) if not total: @@ -592,19 +536,15 @@ class DirectionOfMotionFeature(featuresModule.FeatureExtractor): [0.470...] ''' id = 'm17' + name = 'Direction of Motion' + description = 'Fraction of melodic intervals that are rising rather than falling.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Direction of Motion' - self.description = 'Fraction of melodic intervals that are rising rather than falling.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') rising = 0 falling = 0 cBundle = [] @@ -651,22 +591,18 @@ class DurationOfMelodicArcsFeature(featuresModule.FeatureExtractor): [1.74...] ''' id = 'M18' + name = 'Duration of Melodic Arcs' + description = ('Average number of notes that separate melodic ' + 'peaks and troughs in any part. This is calculated as the ' + 'total number of intervals (not counting unisons) divided ' + 'by the number of times the melody changes direction.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Duration of Melodic Arcs' - self.description = ('Average number of notes that separate melodic ' - 'peaks and troughs in any part. This is calculated as the ' - 'total number of intervals (not counting unisons) divided ' - 'by the number of times the melody changes direction.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # `cList` contains a list of melodic intervals in a part. # For example, C4 E4 G4 E4 C4 results in a cList of [4, 3, -3, -4]. # Each part is encoded in a separate cList; cBundle contains all @@ -706,6 +642,7 @@ def process(self): elif interval < 0: current_direction = DESCENDING # Duration of melodic arcs is 0 if it never changes direction + duration_of_melodic_arcs: float if direction_changes == 0: duration_of_melodic_arcs = 0 else: @@ -742,25 +679,21 @@ class SizeOfMelodicArcsFeature(featuresModule.FeatureExtractor): [4.84...] ''' id = 'M19' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Size of Melodic Arcs' - self.description = ('Average span (in semitones) between melodic peaks ' - 'and troughs in any part. Each time the melody changes ' - 'direction begins a new arc. The average size of' - 'melodic arcs is defined as the total size of melodic' - 'intervals between changes of directions - or between' - 'the start of the melody and the first change of' - 'direction - divided by the number of direction changes.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + name = 'Size of Melodic Arcs' + description = ('Average span (in semitones) between melodic peaks ' + 'and troughs in any part. Each time the melody changes ' + 'direction begins a new arc. The average size of' + 'melodic arcs is defined as the total size of melodic' + 'intervals between changes of directions - or between' + 'the start of the melody and the first change of' + 'direction - divided by the number of direction changes.') + + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # `cList` contains a list of melodic intervals in a part. # For example, C4 E4 G4 E4 C4 results in a cList of [4, 3, -3, -4]. # Each part is encoded in a separate cList; cBundle contains all @@ -814,6 +747,7 @@ def process(self): this_arc_interval += abs(interval) # If it never changes direction, the size of melodic arcs is defined to be 0 + size_of_melodic_arcs: float if direction_changes == 0: size_of_melodic_arcs = 0 else: @@ -835,20 +769,16 @@ class MostCommonPitchPrevalenceFeature(featuresModule.FeatureExtractor): 0.116... ''' id = 'P1' + name = 'Most Common Pitch Prevalence' + description = 'Fraction of Note Ons corresponding to the most common pitch.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Most Common Pitch Prevalence' - self.description = 'Fraction of Note Ons corresponding to the most common pitch.' - self.isSequential = True - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.midiPitchHistogram'] if not histo: raise JSymbolicFeatureException('input lacks notes') @@ -870,20 +800,16 @@ class MostCommonPitchClassPrevalenceFeature(featuresModule.FeatureExtractor): [0.196...] ''' id = 'P2' + name = 'Most Common Pitch Class Prevalence' + description = 'Fraction of Note Ons corresponding to the most common pitch class.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Most Common Pitch Class Prevalence' - self.description = 'Fraction of Note Ons corresponding to the most common pitch class.' - self.isSequential = True - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.pitchClassHistogram'] # if a tie this will return the first # if all zeros will return zero @@ -905,21 +831,17 @@ class RelativeStrengthOfTopPitchesFeature(featuresModule.FeatureExtractor): [0.947...] ''' id = 'P3' + name = 'Relative Strength of Top Pitches' + description = ('The frequency of the 2nd most common pitch ' + 'divided by the frequency of the most common pitch.') + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Relative Strength of Top Pitches' - self.description = ('The frequency of the 2nd most common pitch ' - 'divided by the frequency of the most common pitch.') - self.isSequential = True - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.midiPitchHistogram'] # if a tie this will return the first # if all zeros will return zero @@ -941,21 +863,17 @@ class RelativeStrengthOfTopPitchClassesFeature(featuresModule.FeatureExtractor): [0.906...] ''' id = 'P4' + name = 'Relative Strength of Top Pitch Classes' + description = ('The frequency of the 2nd most common pitch class ' + 'divided by the frequency of the most common pitch class.') + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Relative Strength of Top Pitch Classes' - self.description = ('The frequency of the 2nd most common pitch class ' - 'divided by the frequency of the most common pitch class.') - self.isSequential = True - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # copy b/c will edit histo = copy.deepcopy(self.data['pitches.pitchClassHistogram']) # if a tie this will return the first @@ -982,20 +900,16 @@ class IntervalBetweenStrongestPitchesFeature(featuresModule.FeatureExtractor): [5] ''' id = 'P5' + name = 'Interval Between Strongest Pitches' + description = ('Absolute value of the difference between ' + 'the pitches of the two most common MIDI pitches.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Interval Between Strongest Pitches' - self.description = ('Absolute value of the difference between ' - 'the pitches of the two most common MIDI pitches.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.midiPitchHistogram'] # if a tie this will return the first # if all zeros will return zero @@ -1017,20 +931,16 @@ class IntervalBetweenStrongestPitchClassesFeature( [5] ''' id = 'P6' + name = 'Interval Between Strongest Pitch Classes' + description = ('Absolute value of the difference between the pitch ' + 'classes of the two most common MIDI pitch classes.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Interval Between Strongest Pitch Classes' - self.description = ('Absolute value of the difference between the pitch ' - 'classes of the two most common MIDI pitch classes.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = copy.deepcopy(self.data['pitches.pitchClassHistogram']) # if a tie this will return the first # if all zeros will return zero @@ -1053,20 +963,16 @@ class NumberOfCommonPitchesFeature(featuresModule.FeatureExtractor): [3] ''' id = 'P7' + name = 'Number of Common Pitches' + description = ('Number of pitches that account individually ' + 'for at least 9% of all notes.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Number of Common Pitches' - self.description = ('Number of pitches that account individually ' - 'for at least 9% of all notes.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.midiPitchHistogram'] total = sum(histo.values()) post = 0 @@ -1086,19 +992,15 @@ class PitchVarietyFeature(featuresModule.FeatureExtractor): [24] ''' id = 'P8' + name = 'Pitch Variety' + description = 'Number of pitches used at least once.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Pitch Variety' - self.description = 'Number of pitches used at least once.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.midiPitchHistogram'] post = 0 for i, count in enumerate(histo): @@ -1117,19 +1019,15 @@ class PitchClassVarietyFeature(featuresModule.FeatureExtractor): [10] ''' id = 'P9' + name = 'Pitch Class Variety' + description = 'Number of pitch classes used at least once.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Pitch Class Variety' - self.description = 'Number of pitch classes used at least once.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.pitchClassHistogram'] post = 0 for i, count in enumerate(histo): @@ -1140,7 +1038,7 @@ def process(self): class RangeFeature(featuresModule.FeatureExtractor): ''' - Difference between highest and lowest pitches. In semitones + Difference between highest and lowest pitches, in semitones. >>> s = corpus.parse('bwv66.6') >>> fe = features.jSymbolic.RangeFeature(s) @@ -1148,19 +1046,15 @@ class RangeFeature(featuresModule.FeatureExtractor): [34] ''' id = 'P10' + name = 'Range' + description = 'Difference between highest and lowest pitches.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Range' - self.description = 'Difference between highest and lowest pitches.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.midiPitchHistogram'] if not histo: raise JSymbolicFeatureException('input lacks notes') @@ -1180,20 +1074,16 @@ class MostCommonPitchFeature(featuresModule.FeatureExtractor): [61] ''' id = 'P11' + name = 'Most Common Pitch' + description = 'Bin label of the most common pitch.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Most Common Pitch' - self.description = 'Bin label of the most common pitch.' - self.isSequential = True - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.midiPitchHistogram'] try: pNumberMax = histo.most_common(1)[0][0] @@ -1212,20 +1102,16 @@ class PrimaryRegisterFeature(featuresModule.FeatureExtractor): [61.12...] ''' id = 'P12' + name = 'Primary Register' + description = 'Average MIDI pitch.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Primary Register' - self.description = 'Average MIDI pitch.' - self.isSequential = True - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches'] if not histo: raise JSymbolicFeatureException('input lacks notes') @@ -1242,20 +1128,16 @@ class ImportanceOfBassRegisterFeature(featuresModule.FeatureExtractor): [0.184...] ''' id = 'P13' + name = 'Importance of Bass Register' + description = 'Fraction of Note Ons between MIDI pitches 0 and 54.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Importance of Bass Register' - self.description = 'Fraction of Note Ons between MIDI pitches 0 and 54.' - self.isSequential = True - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.midiPitchHistogram'] if not histo: raise JSymbolicFeatureException('input lacks notes') @@ -1271,7 +1153,7 @@ def process(self): class ImportanceOfMiddleRegisterFeature(featuresModule.FeatureExtractor): ''' - Fraction of Notes between MIDI pitches 55 and 72 + Fraction of Notes between MIDI pitches 55 and 72. >>> s = corpus.parse('bwv66.6') >>> fe = features.jSymbolic.ImportanceOfMiddleRegisterFeature(s) @@ -1279,20 +1161,16 @@ class ImportanceOfMiddleRegisterFeature(featuresModule.FeatureExtractor): [0.766...] ''' id = 'P14' + name = 'Importance of Middle Register' + description = 'Fraction of Note Ons between MIDI pitches 55 and 72.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Importance of Middle Register' - self.description = 'Fraction of Note Ons between MIDI pitches 55 and 72.' - self.isSequential = True - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.midiPitchHistogram'] if not histo: raise JSymbolicFeatureException('input lacks notes') @@ -1316,20 +1194,16 @@ class ImportanceOfHighRegisterFeature(featuresModule.FeatureExtractor): [0.049...] ''' id = 'P15' + name = 'Importance of High Register' + description = 'Fraction of Note Ons between MIDI pitches 73 and 127.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Importance of High Register' - self.description = 'Fraction of Note Ons between MIDI pitches 73 and 127.' - self.isSequential = True - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.midiPitchHistogram'] if not histo: raise JSymbolicFeatureException('input lacks notes') @@ -1353,19 +1227,15 @@ class MostCommonPitchClassFeature(featuresModule.FeatureExtractor): [1] ''' id = 'P16' + name = 'Most Common Pitch Class' + description = 'Bin label of the most common pitch class.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Most Common Pitch Class' - self.description = 'Bin label of the most common pitch class.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['pitches.pitchClassHistogram'] pIndexMax = histo.index(max(histo)) self.feature.vector[0] = pIndexMax @@ -1373,46 +1243,38 @@ def process(self): class DominantSpreadFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. Largest number of consecutive pitch classes separated by perfect 5ths that accounted for at least 9% each of the notes. ''' id = 'P17' + name = 'Dominant Spread' + description = ('Largest number of consecutive pitch classes separated by ' + 'perfect 5ths that accounted for at least 9% each of the notes.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Dominant Spread' - self.description = ('Largest number of consecutive pitch classes separated by ' - 'perfect 5ths that accounted for at least 9% each of the notes.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement class StrongTonalCentresFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. Number of peaks in the fifths pitch histogram that each account for at least 9% of all Note Ons. ''' id = 'P18' + name = 'Strong Tonal Centres' + description = ('Number of peaks in the fifths pitch histogram that each account ' + 'for at least 9% of all Note Ons.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Strong Tonal Centres' - self.description = ('Number of peaks in the fifths pitch histogram that each account ' - 'for at least 9% of all Note Ons.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement @@ -1440,21 +1302,18 @@ class BasicPitchHistogramFeature(featuresModule.FeatureExtractor): 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] ''' id = 'P19' + name = 'Basic Pitch Histogram' + description = ('A features array with bins corresponding to the ' + 'values of the basic pitch histogram.') + dimensions = 128 + normalize = True - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Basic Pitch Histogram' - self.description = ('A features array with bins corresponding to the ' - 'values of the basic pitch histogram.') - self.isSequential = True - self.dimensions = 128 - self.normalize = True - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') for i, count in self.data['pitches.midiPitchHistogram'].items(): self.feature.vector[i] = count @@ -1474,24 +1333,21 @@ class PitchClassDistributionFeature(featuresModule.FeatureExtractor): 0.085..., 0.134..., 0.018..., 0.171..., 0.0] ''' id = 'P20' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Pitch Class Distribution' - self.description = ('A feature array with 12 entries where the first holds ' - 'the frequency of the bin of the pitch class histogram with ' - 'the highest frequency, and the following entries holding ' - 'the successive bins of the histogram, wrapping around if necessary.') - self.isSequential = True - self.dimensions = 12 - self.discrete = False - self.normalize = True - - def process(self): + name = 'Pitch Class Distribution' + description = ('A feature array with 12 entries where the first holds ' + 'the frequency of the bin of the pitch class histogram with ' + 'the highest frequency, and the following entries holding ' + 'the successive bins of the histogram, wrapping around if necessary.') + dimensions = 12 + discrete = False + normalize = True + + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # Create vector with [C, C#, D, etc.] temp = [0] * self.dimensions for i, count in enumerate(self.data['pitches.pitchClassHistogram']): @@ -1521,26 +1377,20 @@ class FifthsPitchHistogramFeature(featuresModule.FeatureExtractor): 0.085..., 0.006..., 0.018..., 0.036...] ''' id = 'P21' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Fifths Pitch Histogram' - self.description = ('A feature array with bins corresponding to the values of the ' - '5ths pitch class histogram.') - self.isSequential = True - self.dimensions = 12 - self.normalize = True - - # create pc to index mapping - self._mapping = {} - for i in range(12): - self._mapping[i] = (7 * i) % 12 - - def process(self): + name = 'Fifths Pitch Histogram' + description = ('A feature array with bins corresponding to the values of the ' + '5ths pitch class histogram.') + dimensions = 12 + normalize = True + # pitch-class to circle-of-fifths index mapping + _mapping = {i: (7 * i) % 12 for i in range(12)} + + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') for i, count in enumerate(self.data['pitches.pitchClassHistogram']): self.feature.vector[self._mapping[i]] = count @@ -1568,23 +1418,19 @@ class QualityFeature(featuresModule.FeatureExtractor): [1] ''' id = 'P22' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Quality' - self.description = ''' + name = 'Quality' + description = ''' Set to 0 if the key signature indicates that a recording is major, set to 1 if it indicates that it is minor and set to 0 if key signature is unknown. ''' - self.isSequential = True - self.dimensions = 1 - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') allKeys = self.data['flat.getElementsByClass(Key)'] keyFeature = None for x in allKeys: @@ -1602,30 +1448,26 @@ def process(self): class GlissandoPrevalenceFeature(featuresModule.FeatureExtractor): ''' - Not yet implemented in music21 + Not yet implemented in music21. Number of Note Ons that have at least one MIDI Pitch Bend associated with them divided by total number of pitched Note Ons. ''' id = 'P23' + name = 'Glissando Prevalence' + description = ('Number of Note Ons that have at least one MIDI Pitch Bend ' + 'associated with them divided by total number of pitched Note Ons.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Glissando Prevalence' - self.description = ('Number of Note Ons that have at least one MIDI Pitch Bend ' - 'associated with them divided by total number of pitched Note Ons.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement class AverageRangeOfGlissandosFeature(featuresModule.FeatureExtractor): ''' - Not yet implemented in music21 + Not yet implemented in music21. Average range of MIDI Pitch Bends, where "range" is defined as the greatest value of the absolute difference between 64 and the @@ -1633,70 +1475,57 @@ class AverageRangeOfGlissandosFeature(featuresModule.FeatureExtractor): Note On and Note Off messages of any note ''' id = 'P24' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Average Range Of Glissandos' - self.description = ('Average range of MIDI Pitch Bends, where "range" is ' - 'defined as the greatest value of the absolute difference ' - 'between 64 and the second data byte of all MIDI Pitch Bend ' - 'messages falling between the Note On and Note Off messages ' - 'of any note.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + name = 'Average Range Of Glissandos' + description = ('Average range of MIDI Pitch Bends, where "range" is ' + 'defined as the greatest value of the absolute difference ' + 'between 64 and the second data byte of all MIDI Pitch Bend ' + 'messages falling between the Note On and Note Off messages ' + 'of any note.') + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement class VibratoPrevalenceFeature(featuresModule.FeatureExtractor): ''' - Not yet implemented in music21 + Not yet implemented in music21. Number of notes for which Pitch Bend messages change direction at least twice divided by total number of notes that have Pitch Bend messages associated with them. ''' id = 'P25' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Vibrato Prevalence' - self.description = ('Number of notes for which Pitch Bend messages change ' - 'direction at least twice divided by total number of notes ' - 'that have Pitch Bend messages associated with them.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + name = 'Vibrato Prevalence' + description = ('Number of notes for which Pitch Bend messages change ' + 'direction at least twice divided by total number of notes ' + 'that have Pitch Bend messages associated with them.') + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement class PrevalenceOfMicrotonesFeature(featuresModule.FeatureExtractor): ''' - not yet implemented + Not yet implemented. Number of Note Ons that are preceded by isolated MIDI Pitch Bend messages as a fraction of the total number of Note Ons.' ''' id = 'P26' + name = 'Prevalence Of Microtones' + description = ('Number of Note Ons that are preceded by isolated MIDI Pitch ' + 'Bend messages as a fraction of the total number of Note Ons.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, - **keywords) - - self.name = 'Prevalence Of Microtones' - self.description = ('Number of Note Ons that are preceded by isolated MIDI Pitch ' - 'Bend messages as a fraction of the total number of Note Ons.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement @@ -1724,16 +1553,12 @@ class StrongestRhythmicPulseFeature(featuresModule.FeatureExtractor): ''' id = 'R1' + name = 'Strongest Rhythmic Pulse' + description = 'Bin label of the beat bin with the highest frequency.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Strongest Rhythmic Pulse' - self.description = 'Bin label of the beat bin with the highest frequency.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') beatHisto = self.data['flat.secondsMap.beatHistogram'] self.feature.vector[0] = beatHisto.index(max(beatHisto)) @@ -1758,17 +1583,13 @@ class SecondStrongestRhythmicPulseFeature(featuresModule.FeatureExtractor): ''' id = 'R2' + name = 'Second Strongest Rhythmic Pulse' + description = ('Bin label of the beat bin of the peak ' + 'with the second highest frequency.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Second Strongest Rhythmic Pulse' - self.description = ('Bin label of the beat bin of the peak ' - 'with the second highest frequency.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') beatHisto = copy.copy(self.data['flat.secondsMap.beatHistogram']) highestIndex = beatHisto.index(max(beatHisto)) beatHisto[highestIndex] = 0 @@ -1798,18 +1619,14 @@ class HarmonicityOfTwoStrongestRhythmicPulsesFeature( ''' id = 'R3' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Harmonicity of Two Strongest Rhythmic Pulses' - self.description = ('The bin label of the higher (in terms of bin label) of the ' - 'two beat bins of the peaks with the highest frequency ' - 'divided by the bin label of the lower.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + name = 'Harmonicity of Two Strongest Rhythmic Pulses' + description = ('The bin label of the higher (in terms of bin label) of the ' + 'two beat bins of the peaks with the highest frequency ' + 'divided by the bin label of the lower.') + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') beatHisto = copy.copy(self.data['flat.secondsMap.beatHistogram']) highestIndex = beatHisto.index(max(beatHisto)) beatHisto[highestIndex] = 0 @@ -1830,16 +1647,12 @@ class StrengthOfStrongestRhythmicPulseFeature(featuresModule.FeatureExtractor): 0.853... ''' id = 'R4' + name = 'Strength of Strongest Rhythmic Pulse' + description = 'Frequency of the beat bin with the highest frequency.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Strength of Strongest Rhythmic Pulse' - self.description = 'Frequency of the beat bin with the highest frequency.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') beatHisto = self.data['flat.secondsMap.beatHistogram'] self.feature.vector[0] = max(beatHisto) / sum(beatHisto) @@ -1857,17 +1670,13 @@ class StrengthOfSecondStrongestRhythmicPulseFeature( 0.121... ''' id = 'R5' + name = 'Strength of Second Strongest Rhythmic Pulse' + description = ('Frequency of the beat bin of the peak ' + 'with the second highest frequency.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Strength of Second Strongest Rhythmic Pulse' - self.description = ('Frequency of the beat bin of the peak ' - 'with the second highest frequency.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') beatHisto = copy.copy(self.data['flat.secondsMap.beatHistogram']) sumHisto = sum(beatHisto) @@ -1893,18 +1702,14 @@ class StrengthRatioOfTwoStrongestRhythmicPulsesFeature( ''' id = 'R6' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Strength Ratio of Two Strongest Rhythmic Pulses' - self.description = ('The frequency of the higher (in terms of frequency) of the two ' - 'beat bins corresponding to the peaks with the highest ' - 'frequency divided by the frequency of the lower.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + name = 'Strength Ratio of Two Strongest Rhythmic Pulses' + description = ('The frequency of the higher (in terms of frequency) of the two ' + 'beat bins corresponding to the peaks with the highest ' + 'frequency divided by the frequency of the lower.') + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') beatHisto = copy.copy(self.data['flat.secondsMap.beatHistogram']) theHighest = max(beatHisto) @@ -1928,17 +1733,13 @@ class CombinedStrengthOfTwoStrongestRhythmicPulsesFeature( 0.975... ''' id = 'R7' + name = 'Combined Strength of Two Strongest Rhythmic Pulses' + description = ('The sum of the frequencies of the two beat bins ' + 'of the peaks with the highest frequencies.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Combined Strength of Two Strongest Rhythmic Pulses' - self.description = ('The sum of the frequencies of the two beat bins ' - 'of the peaks with the highest frequencies.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') beatHisto = copy.copy(self.data['flat.secondsMap.beatHistogram']) sumHisto = sum(beatHisto) @@ -1952,69 +1753,55 @@ def process(self): class NumberOfStrongPulsesFeature(featuresModule.FeatureExtractor): ''' - Not yet implemented + Not yet implemented. Number of beat peaks with normalized frequencies over 0.1. ''' id = 'R8' + name = 'Number of Strong Pulses' + description = 'Number of beat peaks with normalized frequencies over 0.1.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Number of Strong Pulses' - self.description = 'Number of beat peaks with normalized frequencies over 0.1.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement class NumberOfModeratePulsesFeature(featuresModule.FeatureExtractor): ''' - Not yet implemented + Not yet implemented. Number of beat peaks with normalized frequencies over 0.01. ''' id = 'R9' + name = 'Number of Moderate Pulses' + description = 'Number of beat peaks with normalized frequencies over 0.01.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Number of Moderate Pulses' - self.description = 'Number of beat peaks with normalized frequencies over 0.01.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement class NumberOfRelativelyStrongPulsesFeature(featuresModule.FeatureExtractor): ''' - not yet implemented + Not yet implemented. Number of beat peaks with frequencies at least 30% as high as the frequency of the bin with the highest frequency. ''' id = 'R10' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Number of Relatively Strong Pulses' - self.description = ('Number of beat peaks with frequencies at least 30% as high as ' - 'the frequency of the bin with the highest frequency.') - self.isSequential = True - self.dimensions = 1 + name = 'Number of Relatively Strong Pulses' + description = ('Number of beat peaks with frequencies at least 30% as high as ' + 'the frequency of the bin with the highest frequency.') class RhythmicLoosenessFeature(featuresModule.FeatureExtractor): ''' - not yet implemented + Not yet implemented. Average width of beat histogram peaks (in beats per minute). Width is measured for all peaks with frequencies at least 30% as high as the highest peak, @@ -2022,27 +1809,23 @@ class RhythmicLoosenessFeature(featuresModule.FeatureExtractor): 30% of the height of the peak. ''' id = 'R11' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Rhythmic Looseness' - self.description = dedent(''' + name = 'Rhythmic Looseness' + description = dedent(''' Average width of beat histogram peaks (in beats per minute). Width is measured for all peaks with frequencies at least 30% as high as the highest peak, and is defined by the distance between the points on the peak in question that are 30% of the height of the peak.''') - self.isSequential = True - self.dimensions = 1 - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement class PolyrhythmsFeature(featuresModule.FeatureExtractor): ''' - Not yet implemented + Not yet implemented. Number of beat peaks with frequencies at least 30% of the highest frequency whose bin labels are not integer multiples or factors @@ -2052,71 +1835,58 @@ class PolyrhythmsFeature(featuresModule.FeatureExtractor): over 30% of the highest frequency. ''' id = 'R12' - - def __init__(self, dataOrStream=None, **keywords): - featuresModule.FeatureExtractor.__init__(self, - dataOrStream=dataOrStream, - **keywords) - - self.name = 'Polyrhythms' - self.description = ''' + name = 'Polyrhythms' + description = ''' Number of beat peaks with frequencies at least 30% of the highest frequency whose bin labels are not integer multiples or factors (using only multipliers of 1, 2, 3, 4, 6 and 8) (with an accepted error of +/- 3 bins) of the bin label of the peak with the highest frequency. This number is then divided by the total number of beat bins with frequencies over 30% of the highest frequency.''' - self.isSequential = True - self.dimensions = 1 - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement class RhythmicVariabilityFeature(featuresModule.FeatureExtractor): ''' - Not yet implemented + Not yet implemented. Standard deviation of the bin values (except the first 40 empty ones). ''' id = 'R13' + name = 'Rhythmic Variability' + description = 'Standard deviation of the bin values (except the first 40 empty ones).' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Rhythmic Variability' - self.description = 'Standard deviation of the bin values (except the first 40 empty ones).' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement class BeatHistogramFeature(featuresModule.FeatureExtractor): ''' - Not yet implemented + Not yet implemented. A feature extractor that finds a feature array with entries corresponding to the frequency values of each of the bins of the beat histogram (except the first 40 empty ones). ''' id = 'R14' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Beat Histogram' - self.description = ('A feature array with entries corresponding to the ' - 'frequency values of each of the bins of the beat histogram ' - '(except the first 40 empty ones).') - self.isSequential = True - self.dimensions = 161 - self.discrete = False - - def process(self): + name = 'Beat Histogram' + description = ('A feature array with entries corresponding to the ' + 'frequency values of each of the bins of the beat histogram ' + '(except the first 40 empty ones).') + dimensions = 161 + discrete = False + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement @@ -2136,16 +1906,12 @@ class NoteDensityFeature(featuresModule.FeatureExtractor): [7.244...] ''' id = 'R15' + name = 'Note Density' + description = 'Average number of notes per second.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Note Density' - self.description = 'Average number of notes per second.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') secondsMap = self.data['flat.secondsMap'] # The average number of notes per second in the piece is calculated # by taking the total number of notes in the piece and dividing by @@ -2177,16 +1943,12 @@ class AverageNoteDurationFeature(featuresModule.FeatureExtractor): ''' id = 'R17' + name = 'Average Note Duration' + description = 'Average duration of notes in seconds.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Average Note Duration' - self.description = 'Average duration of notes in seconds.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') secondsMap = self.data['flat.secondsMap'] if not secondsMap: raise JSymbolicFeatureException('input lacks notes') @@ -2217,16 +1979,12 @@ class VariabilityOfNoteDurationFeature(featuresModule.FeatureExtractor): 0.178... ''' id = 'R18' + name = 'Variability of Note Duration' + description = 'Standard deviation of note durations in seconds.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Variability of Note Duration' - self.description = 'Standard deviation of note durations in seconds.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') secondsMap = self.data['flat.secondsMap'] if not secondsMap: raise JSymbolicFeatureException('input lacks notes') @@ -2248,16 +2006,12 @@ class MaximumNoteDurationFeature(featuresModule.FeatureExtractor): ''' id = 'R19' + name = 'Maximum Note Duration' + description = 'Duration of the longest note (in seconds).' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Maximum Note Duration' - self.description = 'Duration of the longest note (in seconds).' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') secondsMap = self.data['flat.secondsMap'] if not secondsMap: raise JSymbolicFeatureException('input lacks notes') @@ -2279,16 +2033,12 @@ class MinimumNoteDurationFeature(featuresModule.FeatureExtractor): [0.3125] ''' id = 'R20' + name = 'Minimum Note Duration' + description = 'Duration of the shortest note (in seconds).' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Minimum Note Duration' - self.description = 'Duration of the shortest note (in seconds).' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') secondsMap = self.data['flat.secondsMap'] if not secondsMap: raise JSymbolicFeatureException('input lacks notes') @@ -2313,17 +2063,13 @@ class StaccatoIncidenceFeature(featuresModule.FeatureExtractor): ''' id = 'R21' + name = 'Staccato Incidence' + description = ('Number of notes with durations of less than a 10th ' + 'of a second divided by the total number of notes in the recording.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Staccato Incidence' - self.description = ('Number of notes with durations of less than a 10th ' - 'of a second divided by the total number of notes in the recording.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') secondsMap = self.data['flat.secondsMap'] if not secondsMap: raise JSymbolicFeatureException('input lacks notes') @@ -2348,16 +2094,12 @@ class AverageTimeBetweenAttacksFeature(featuresModule.FeatureExtractor): ''' id = 'R22' + name = 'Average Time Between Attacks' + description = 'Average time in seconds between Note On events (regardless of channel).' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Average Time Between Attacks' - self.description = 'Average time in seconds between Note On events (regardless of channel).' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') secondsMap = self.data['flat.secondsMap'] # Get a list of note onset times onsets = [bundle['offsetSeconds'] for bundle in secondsMap] @@ -2389,17 +2131,13 @@ class VariabilityOfTimeBetweenAttacksFeature(featuresModule.FeatureExtractor): ''' id = 'R23' + name = 'Variability of Time Between Attacks' + description = ('Standard deviation of the times, in seconds, ' + 'between Note On events (regardless of channel).') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Variability of Time Between Attacks' - self.description = ('Standard deviation of the times, in seconds, ' - 'between Note On events (regardless of channel).') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') secondsMap = self.data['flat.secondsMap'] # Create a list of difference in time offset between consecutive notes onsets = [bundle['offsetSeconds'] for bundle in secondsMap] @@ -2433,17 +2171,13 @@ class AverageTimeBetweenAttacksForEachVoiceFeature( 0.442... ''' id = 'R24' + name = 'Average Time Between Attacks For Each Voice' + description = ('Average of average times in seconds between Note On events ' + 'on individual channels that contain at least one note.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Average Time Between Attacks For Each Voice' - self.description = ('Average of average times in seconds between Note On events ' - 'on individual channels that contain at least one note.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') onsetsByPart = [] avgByPart = [] if self.data.partsCount > 0: @@ -2491,18 +2225,14 @@ class AverageVariabilityOfTimeBetweenAttacksForEachVoiceFeature( ''' id = 'R25' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Average Variability of Time Between Attacks For Each Voice' - self.description = ('Average standard deviation, in seconds, of time between ' - 'Note On events on individual channels that contain ' - 'at least one note.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + name = 'Average Variability of Time Between Attacks For Each Voice' + description = ('Average standard deviation, in seconds, of time between ' + 'Note On events on individual channels that contain ' + 'at least one note.') + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') onsetsByPart = [] stdDeviationByPart = [] @@ -2610,16 +2340,12 @@ class InitialTempoFeature(featuresModule.FeatureExtractor): ''' id = 'R30' + name = 'Initial Tempo' + description = 'Tempo in beats per minute at the start of the recording.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Initial Tempo' - self.description = 'Tempo in beats per minute at the start of the recording.' - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') triples = self.data['metronomeMarkBoundaries'] # the first is the default, if necessary; also provides start/end time mm = triples[0][2] @@ -2647,19 +2373,16 @@ class InitialTimeSignatureFeature(featuresModule.FeatureExtractor): ''' id = 'R31' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Initial Time Signature' - self.description = ('A feature array with two elements. ' - 'The first is the numerator of the first occurring time signature ' - 'and the second is the denominator of the first occurring time ' - 'signature. Both are set to 0 if no time signature is present.') - self.isSequential = True - self.dimensions = 2 - - def process(self): + name = 'Initial Time Signature' + description = ('A feature array with two elements. ' + 'The first is the numerator of the first occurring time signature ' + 'and the second is the denominator of the first occurring time ' + 'signature. Both are set to 0 if no time signature is present.') + dimensions = 2 + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') elements = self.data['flat.getElementsByClass(TimeSignature)'] if not elements: return # vector already zero @@ -2695,19 +2418,15 @@ class CompoundOrSimpleMeterFeature(featuresModule.FeatureExtractor): ''' id = 'R32' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Compound Or Simple Meter' - self.description = ('Set to 1 if the initial meter is compound ' - '(numerator of time signature is greater than or equal to 6 ' - 'and is evenly divisible by 3) and to 0 if it is simple ' - '(if the above condition is not fulfilled).') - self.isSequential = True - self.dimensions = 1 - - def process(self): + name = 'Compound Or Simple Meter' + description = ('Set to 1 if the initial meter is compound ' + '(numerator of time signature is greater than or equal to 6 ' + 'and is evenly divisible by 3) and to 0 if it is simple ' + '(if the above condition is not fulfilled).') + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') elements = self.data['flat.getElementsByClass(TimeSignature)'] if elements: @@ -2743,17 +2462,13 @@ class TripleMeterFeature(featuresModule.FeatureExtractor): ''' id = 'R33' + name = 'Triple Meter' + description = ('Set to 1 if numerator of initial time signature is 3, ' + 'set to 0 otherwise.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Triple Meter' - self.description = ('Set to 1 if numerator of initial time signature is 3, ' - 'set to 0 otherwise.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') elements = self.data['flat.getElementsByClass(TimeSignature)'] # not: not looking at other triple meters if elements and elements[0].numerator == 3: @@ -2784,17 +2499,13 @@ class QuintupleMeterFeature(featuresModule.FeatureExtractor): ''' id = 'R34' + name = 'Quintuple Meter' + description = ('Set to 1 if numerator of initial time signature is 5, ' + 'set to 0 otherwise.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Quintuple Meter' - self.description = ('Set to 1 if numerator of initial time signature is 5, ' - 'set to 0 otherwise.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') elements = self.data['flat.getElementsByClass(TimeSignature)'] if elements and elements[0].numerator == 5: self.feature.vector[0] = 1 @@ -2825,17 +2536,13 @@ class ChangesOfMeterFeature(featuresModule.FeatureExtractor): [0] ''' id = 'R35' + name = 'Changes of Meter' + description = ('Set to 1 if the time signature is changed one or more ' + 'times during the recording.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Changes of Meter' - self.description = ('Set to 1 if the time signature is changed one or more ' - 'times during the recording') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') elements = self.data['flat.getElementsByClass(TimeSignature)'] if len(elements) <= 1: return # vector already zero @@ -2859,17 +2566,14 @@ class DurationFeature(featuresModule.FeatureExtractor): 18.0 ''' id = 'R36' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Duration' - self.description = 'The total duration in seconds of the music.' - self.isSequential = False # this is the only jSymbolic non seq feature - self.dimensions = 1 - self.discrete = False - - def process(self): + name = 'Duration' + description = 'The total duration in seconds of the music.' + isSequential = False # this is the only jSymbolic non seq feature + discrete = False + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') secondsMap = self.data['flat.secondsMap'] if not secondsMap: raise JSymbolicFeatureException('input lacks duration') @@ -2886,26 +2590,20 @@ def process(self): class OverallDynamicRangeFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. The maximum loudness minus the minimum loudness value. TODO: implement ''' id = 'D1' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Overall Dynamic Range' - self.description = 'The maximum loudness minus the minimum loudness value.' - self.isSequential = True - self.dimensions = 1 + name = 'Overall Dynamic Range' + description = 'The maximum loudness minus the minimum loudness value.' class VariationOfDynamicsFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. Standard deviation of loudness levels of all notes. @@ -2913,19 +2611,13 @@ class VariationOfDynamicsFeature(featuresModule.FeatureExtractor): ''' id = 'D2' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Variation of Dynamics' - self.description = 'Standard deviation of loudness levels of all notes.' - self.isSequential = True - self.dimensions = 1 + name = 'Variation of Dynamics' + description = 'Standard deviation of loudness levels of all notes.' class VariationOfDynamicsInEachVoiceFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. The average of the standard deviations of loudness levels within each channel that contains at least one note. @@ -2934,20 +2626,14 @@ class VariationOfDynamicsInEachVoiceFeature(featuresModule.FeatureExtractor): ''' id = 'D3' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Variation of Dynamics In Each Voice' - self.description = ('The average of the standard deviations of loudness ' - 'levels within each channel that contains at least one note.') - self.isSequential = True - self.dimensions = 1 + name = 'Variation of Dynamics In Each Voice' + description = ('The average of the standard deviations of loudness ' + 'levels within each channel that contains at least one note.') class AverageNoteToNoteDynamicsChangeFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. Average change of loudness from one note to the next note in the same channel (in MIDI velocity units). @@ -2957,15 +2643,9 @@ class AverageNoteToNoteDynamicsChangeFeature(featuresModule.FeatureExtractor): ''' id = 'D4' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Average Note To Note Dynamics Change' - self.description = ('Average change of loudness from one note to the next note ' - 'in the same channel (in MIDI velocity units).') - self.isSequential = True - self.dimensions = 1 + name = 'Average Note To Note Dynamics Change' + description = ('Average change of loudness from one note to the next note ' + 'in the same channel (in MIDI velocity units).') # ------------------------------------------------------------------------------ @@ -2991,17 +2671,13 @@ class MaximumNumberOfIndependentVoicesFeature(featuresModule.FeatureExtractor): ''' id = 'T1' + name = 'Maximum Number of Independent Voices' + description = ('Maximum number of different channels in which notes ' + 'have sounded simultaneously. Here, Parts are treated as channels.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Maximum Number of Independent Voices' - self.description = ('Maximum number of different channels in which notes ' - 'have sounded simultaneously. Here, Parts are treated as channels.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # for each chordify, find the largest number different groups found = 0 for c in self.data['chordify.flat.getElementsByClass(Chord)']: @@ -3018,7 +2694,7 @@ def process(self): class AverageNumberOfIndependentVoicesFeature(featuresModule.FeatureExtractor): ''' Average number of different channels in which notes have sounded simultaneously. - Rests are not included in this calculation. Here, Parts are treated as voices + Rests are not included in this calculation. Here, Parts are treated as voices. >>> s = corpus.parse('handel/rinaldo/lascia_chio_pianga') >>> fe = features.jSymbolic.AverageNumberOfIndependentVoicesFeature(s) @@ -3033,18 +2709,14 @@ class AverageNumberOfIndependentVoicesFeature(featuresModule.FeatureExtractor): [3.90...] ''' id = 'T2' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Average Number of Independent Voices' - self.description = ('Average number of different channels in which notes have ' - 'sounded simultaneously. Rests are not included in this ' - 'calculation. Here, Parts are treated as voices') - self.isSequential = True - self.dimensions = 1 - - def process(self): + name = 'Average Number of Independent Voices' + description = ('Average number of different channels in which notes have ' + 'sounded simultaneously. Rests are not included in this ' + 'calculation. Here, Parts are treated as voices.') + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # for each chordify, find the largest number different groups found = [] for c in self.data['chordify.flat.getElementsByClass(Chord)']: @@ -3073,18 +2745,14 @@ class VariabilityOfNumberOfIndependentVoicesFeature( [0.449...] ''' id = 'T3' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Variability of Number of Independent Voices' - self.description = ('Standard deviation of number of different channels ' - 'in which notes have sounded simultaneously. Rests are ' - 'not included in this calculation.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + name = 'Variability of Number of Independent Voices' + description = ('Standard deviation of number of different channels ' + 'in which notes have sounded simultaneously. Rests are ' + 'not included in this calculation.') + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # for each chordify, find the largest number different groups found = [] for c in self.data['chordify.flat.getElementsByClass(Chord)']: @@ -3103,7 +2771,7 @@ def process(self): class VoiceEqualityNumberOfNotesFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. TODO: implement @@ -3111,216 +2779,150 @@ class VoiceEqualityNumberOfNotesFeature(featuresModule.FeatureExtractor): that contains at least one note. ''' id = 'T4' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Voice Equality - Number of Notes' - self.description = ('Standard deviation of the total number of Note Ons ' - 'in each channel that contains at least one note.') - self.isSequential = True - self.dimensions = 1 + name = 'Voice Equality - Number of Notes' + description = ('Standard deviation of the total number of Note Ons ' + 'in each channel that contains at least one note.') class VoiceEqualityNoteDurationFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. TODO: implement ''' id = 'T5' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Voice Equality - Note Duration' - self.description = ('Standard deviation of the total duration of notes in seconds ' - 'in each channel that contains at least one note.') - self.isSequential = True - self.dimensions = 1 + name = 'Voice Equality - Note Duration' + description = ('Standard deviation of the total duration of notes in seconds ' + 'in each channel that contains at least one note.') class VoiceEqualityDynamicsFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. TODO: implement ''' id = 'T6' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Voice Equality - Dynamics' - self.description = ('Standard deviation of the average volume of notes ' - 'in each channel that contains at least one note.') - self.isSequential = True - self.dimensions = 1 + name = 'Voice Equality - Dynamics' + description = ('Standard deviation of the average volume of notes ' + 'in each channel that contains at least one note.') class VoiceEqualityMelodicLeapsFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. TODO: implement ''' id = 'T7' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Voice Equality - Melodic Leaps' - self.description = dedent(''' + name = 'Voice Equality - Melodic Leaps' + description = dedent(''' Standard deviation of the average melodic leap in MIDI pitches for each channel that contains at least one note.''') - self.isSequential = True - self.dimensions = 1 class VoiceEqualityRangeFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. Standard deviation of the differences between the highest and lowest pitches in each channel that contains at least one note. ''' id = 'T8' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Voice Equality - Range' - self.description = dedent(''' + name = 'Voice Equality - Range' + description = dedent(''' Standard deviation of the differences between the highest and lowest pitches in each channel that contains at least one note.''') - self.isSequential = True - self.dimensions = 1 class ImportanceOfLoudestVoiceFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. TODO: implement ''' id = 'T9' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Importance of Loudest Voice' - self.description = dedent(''' + name = 'Importance of Loudest Voice' + description = dedent(''' Difference between the average loudness of the loudest channel and the average loudness of the other channels that contain at least one note.''') - self.isSequential = True - self.dimensions = 1 class RelativeRangeOfLoudestVoiceFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. TODO: implement ''' id = 'T10' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Relative Range of Loudest Voice' - self.description = dedent(''' + name = 'Relative Range of Loudest Voice' + description = dedent(''' Difference between the highest note and the lowest note played in the channel with the highest average loudness divided by the difference between the highest note and the lowest note overall in the piece.''') - self.isSequential = True - self.dimensions = 1 class RangeOfHighestLineFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. TODO: implement ''' id = 'T12' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Range of Highest Line' - self.description = dedent(''' + name = 'Range of Highest Line' + description = dedent(''' Difference between the highest note and the lowest note played in the channel with the highest average pitch divided by the difference between the highest note and the lowest note in the piece.''') - self.isSequential = True - self.dimensions = 1 class RelativeNoteDensityOfHighestLineFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. TODO: implement ''' id = 'T13' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Relative Note Density of Highest Line' - self.description = dedent(''' + name = 'Relative Note Density of Highest Line' + description = dedent(''' Number of Note Ons in the channel with the highest average pitch divided by the average number of Note Ons in all channels that contain at least one note.''') - self.isSequential = True - self.dimensions = 1 class MelodicIntervalsInLowestLineFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. TODO: implement ''' id = 'T15' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Melodic Intervals in Lowest Line' - self.description = dedent(''' + name = 'Melodic Intervals in Lowest Line' + description = dedent(''' Average melodic interval in semitones of the channel with the lowest average pitch divided by the average melodic interval of all channels that contain at least two notes.''') - self.isSequential = True - self.dimensions = 1 class VoiceSeparationFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. Average separation in semitones between the average pitches of consecutive channels (after sorting based/non-average pitch) that contain at least one note. ''' id = 'T20' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Voice Separation' - self.description = dedent(''' + name = 'Voice Separation' + description = dedent(''' Average separation in semi-tones between the average pitches of consecutive channels (after sorting based/non average pitch) that contain at least one note.''') - self.isSequential = True - self.dimensions = 1 # ------------------------------------------------------------------------------ @@ -3362,23 +2964,20 @@ class PitchedInstrumentsPresentFeature(featuresModule.FeatureExtractor): lacks a midiProgram ''' id = 'I1' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Pitched Instruments Present' - self.description = dedent(''' + name = 'Pitched Instruments Present' + description = dedent(''' Which pitched General MIDI Instruments are present. There is one entry for each instrument, which is set to 1.0 if there is at least one Note On in the recording corresponding to the instrument and to 0.0 if there is not.''') - self.isSequential = True - self.dimensions = 128 + dimensions = 128 - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') s = self.data['partitionByInstrument'] # each part has content for each instrument # count = 0 @@ -3396,7 +2995,7 @@ def process(self): class UnpitchedInstrumentsPresentFeature(featuresModule.FeatureExtractor): ''' - Not yet implemented + Not yet implemented. Which unpitched MIDI Percussion Key Map instruments are present. There is one entry for each instrument, which is set to 1.0 if there is @@ -3409,21 +3008,18 @@ class UnpitchedInstrumentsPresentFeature(featuresModule.FeatureExtractor): # values in for events on midi program channel 10 id = 'I2' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Unpitched Instruments Present' - self.description = dedent(''' + name = 'Unpitched Instruments Present' + description = dedent(''' Which unpitched MIDI Percussion Key Map instruments are present. There is one entry for each instrument, which is set to 1.0 if there is at least one Note On in the recording corresponding to the instrument and to 0.0 if there is not. It should be noted that only instruments 35 to 81 are included here, as they are the ones that meet the official standard. They are numbered in this array from 0 to 46.''') - self.isSequential = True - self.dimensions = 47 + dimensions = 47 - def process(self): + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') raise JSymbolicFeatureException('not yet implemented') # TODO: implement @@ -3456,23 +3052,20 @@ class NotePrevalenceOfPitchedInstrumentsFeature( music21.features.jSymbolic.JSymbolicFeatureException: Acoustic Guitar lacks a midiProgram ''' id = 'I3' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Note Prevalence of Pitched Instruments' - self.description = ('The fraction of (pitched) notes played by each ' - 'General MIDI Instrument. There is one entry for ' - 'each instrument, which is set to the number of ' - 'Note Ons played using the corresponding MIDI patch ' - 'divided by the total number of Note Ons in the recording.') - self.isSequential = True - self.dimensions = 128 - - def process(self): + name = 'Note Prevalence of Pitched Instruments' + description = ('The fraction of (pitched) notes played by each ' + 'General MIDI Instrument. There is one entry for ' + 'each instrument, which is set to the number of ' + 'Note Ons played using the corresponding MIDI patch ' + 'divided by the total number of Note Ons in the recording.') + dimensions = 128 + + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') s = self.data['partitionByInstrument'] total = sum(self.data['pitches.pitchClassHistogram']) # each part has content for each instrument @@ -3493,26 +3086,21 @@ def process(self): class NotePrevalenceOfUnpitchedInstrumentsFeature( featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. TODO: implement ''' id = 'I4' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Note Prevalence of Unpitched Instruments' - self.description = dedent(''' + name = 'Note Prevalence of Unpitched Instruments' + description = dedent(''' The fraction of (unpitched) notes played by each General MIDI Percussion Key Map Instrument. There is one entry for each instrument, which is set to the number of Note Ons played using the corresponding MIDI note value divided by the total number of Note Ons in the recording. It should be noted that only instruments 35 to 81 are included here, as they are the ones that meet the official standard. They are numbered in this array from 0 to 46.''') - self.isSequential = True - self.dimensions = 47 + dimensions = 47 # TODO: need to find events in channel 10. @@ -3520,7 +3108,7 @@ def __init__(self, dataOrStream=None, **keywords): class TimePrevalenceOfPitchedInstrumentsFeature( featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. The fraction of the total time of the recording in which a note was sounding for each (pitched) General @@ -3533,18 +3121,13 @@ class TimePrevalenceOfPitchedInstrumentsFeature( ''' id = 'I5' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Time Prevalence of Pitched Instruments' - self.description = ('The fraction of the total time of the recording in which a note ' - 'was sounding for each (pitched) General MIDI Instrument. ' - 'There is one entry for each instrument, which is set to the total ' - 'time in seconds during which a given instrument was sounding one ' - 'or more notes divided by the total length in seconds of the piece.') - self.isSequential = True - self.dimensions = 128 + name = 'Time Prevalence of Pitched Instruments' + description = ('The fraction of the total time of the recording in which a note ' + 'was sounding for each (pitched) General MIDI Instrument. ' + 'There is one entry for each instrument, which is set to the total ' + 'time in seconds during which a given instrument was sounding one ' + 'or more notes divided by the total length in seconds of the piece.') + dimensions = 128 # TODO: this can be done by symbolic duration in native.py @@ -3566,18 +3149,14 @@ class VariabilityOfNotePrevalenceOfPitchedInstrumentsFeature( ''' id = 'I6' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Variability of Note Prevalence of Pitched Instruments' - self.description = ('Standard deviation of the fraction of Note Ons played ' - 'by each (pitched) General MIDI instrument that is ' - 'used to play at least one note.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + name = 'Variability of Note Prevalence of Pitched Instruments' + description = ('Standard deviation of the fraction of Note Ons played ' + 'by each (pitched) General MIDI instrument that is ' + 'used to play at least one note.') + + def process(self) -> None: + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') s = self.data['partitionByInstrument'] total = sum(self.data['pitches.pitchClassHistogram']) if not s: @@ -3603,7 +3182,7 @@ def process(self): class VariabilityOfNotePrevalenceOfUnpitchedInstrumentsFeature( featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. Standard deviation of the fraction of Note Ons played by each (unpitched) MIDI Percussion Key Map instrument that is used to play at least one note. It should be noted that only @@ -3614,18 +3193,12 @@ class VariabilityOfNotePrevalenceOfUnpitchedInstrumentsFeature( ''' id = 'I7' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Variability of Note Prevalence of Unpitched Instruments' - self.description = ( - 'Standard deviation of the fraction of Note Ons played by each (unpitched) ' - 'MIDI Percussion Key Map instrument that is used to play at least one note. ' - 'It should be noted that only instruments 35 to 81 are included here, ' - 'as they are the ones that are included in the official standard.') - self.isSequential = True - self.dimensions = 1 + name = 'Variability of Note Prevalence of Unpitched Instruments' + description = ( + 'Standard deviation of the fraction of Note Ons played by each (unpitched) ' + 'MIDI Percussion Key Map instrument that is used to play at least one note. ' + 'It should be noted that only instruments 35 to 81 are included here, ' + 'as they are the ones that are included in the official standard.') class NumberOfPitchedInstrumentsFeature(featuresModule.FeatureExtractor): @@ -3643,20 +3216,16 @@ class NumberOfPitchedInstrumentsFeature(featuresModule.FeatureExtractor): ''' id = 'I8' + name = 'Number of Pitched Instruments' + description = ('Total number of General MIDI patches that are used to ' + 'play at least one note.') - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Number of Pitched Instruments' - self.description = ('Total number of General MIDI patches that are used to ' - 'play at least one note.') - self.isSequential = True - self.dimensions = 1 - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') s = self.data['partitionByInstrument'] # each part has content for each instrument count = 0 @@ -3670,7 +3239,7 @@ def process(self): class NumberOfUnpitchedInstrumentsFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. Number of distinct MIDI Percussion Key Map patches that were used to play at least one note. It should be noted that only instruments 35 to 81 are @@ -3680,37 +3249,25 @@ class NumberOfUnpitchedInstrumentsFeature(featuresModule.FeatureExtractor): ''' id = 'I9' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Number of Unpitched Instruments' - self.description = ('Number of distinct MIDI Percussion Key Map patches that were ' - 'used to play at least one note. It should be noted that only ' - 'instruments 35 to 81 are included here, as they are the ones ' - 'that are included in the official standard.') - self.isSequential = True - self.dimensions = 1 + name = 'Number of Unpitched Instruments' + description = ('Number of distinct MIDI Percussion Key Map patches that were ' + 'used to play at least one note. It should be noted that only ' + 'instruments 35 to 81 are included here, as they are the ones ' + 'that are included in the official standard.') class PercussionPrevalenceFeature(featuresModule.FeatureExtractor): ''' - Not implemented + Not implemented. Total number of Note Ons corresponding to unpitched percussion instruments divided by the total number of Note Ons in the recording. ''' id = 'I10' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Percussion Prevalence' - self.description = ('Total number of Note Ons corresponding to unpitched percussion ' - 'instruments divided by total number of Note Ons in the recording.') - self.isSequential = True - self.dimensions = 1 + name = 'Percussion Prevalence' + description = ('Total number of Note Ons corresponding to unpitched percussion ' + 'instruments divided by total number of Note Ons in the recording.') class InstrumentFractionFeature(featuresModule.FeatureExtractor): @@ -3720,17 +3277,14 @@ class InstrumentFractionFeature(featuresModule.FeatureExtractor): This subclass is in-turn subclassed by all FeatureExtractors that look at the proportional usage of an Instrument ''' + _targetPrograms: Sequence[int] = [] - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - # subclasses must define - self._targetPrograms = [] - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') s = self.data['partitionByInstrument'] total = sum(self.data['pitches.pitchClassHistogram']) count = 0 @@ -3761,17 +3315,10 @@ class StringKeyboardFractionFeature(InstrumentFractionFeature): ''' id = 'I11' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'String Keyboard Fraction' - self.description = ('Fraction of all Note Ons belonging to string keyboard patches ' - '(General MIDI patches 1 to 8).') - self.isSequential = True - self.dimensions = 1 - - self._targetPrograms = range(8) + name = 'String Keyboard Fraction' + description = ('Fraction of all Note Ons belonging to string keyboard patches ' + '(General MIDI patches 1 to 8).') + _targetPrograms = range(8) class AcousticGuitarFractionFeature(InstrumentFractionFeature): @@ -3790,17 +3337,10 @@ class AcousticGuitarFractionFeature(InstrumentFractionFeature): ''' id = 'I12' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Acoustic Guitar Fraction' - self.description = ('Fraction of all Note Ons belonging to acoustic guitar patches ' - '(General MIDI patches 25 and 26).') - self.isSequential = True - self.dimensions = 1 - - self._targetPrograms = [24, 25] + name = 'Acoustic Guitar Fraction' + description = ('Fraction of all Note Ons belonging to acoustic guitar patches ' + '(General MIDI patches 25 and 26).') + _targetPrograms = [24, 25] class ElectricGuitarFractionFeature(InstrumentFractionFeature): @@ -3816,17 +3356,10 @@ class ElectricGuitarFractionFeature(InstrumentFractionFeature): ''' id = 'I13' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Electric Guitar Fraction' - self.description = ('Fraction of all Note Ons belonging to ' - 'electric guitar patches (General MIDI patches 27 to 32).') - self.isSequential = True - self.dimensions = 1 - - self._targetPrograms = list(range(26, 32)) + name = 'Electric Guitar Fraction' + description = ('Fraction of all Note Ons belonging to ' + 'electric guitar patches (General MIDI patches 27 to 32).') + _targetPrograms = list(range(26, 32)) class ViolinFractionFeature(InstrumentFractionFeature): @@ -3845,17 +3378,10 @@ class ViolinFractionFeature(InstrumentFractionFeature): ''' id = 'I14' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Violin Fraction' - self.description = ('Fraction of all Note Ons belonging to violin patches ' - '(General MIDI patches 41 or 111).') - self.isSequential = True - self.dimensions = 1 - - self._targetPrograms = [40, 110] + name = 'Violin Fraction' + description = ('Fraction of all Note Ons belonging to violin patches ' + '(General MIDI patches 41 or 111).') + _targetPrograms = [40, 110] class SaxophoneFractionFeature(InstrumentFractionFeature): @@ -3874,17 +3400,10 @@ class SaxophoneFractionFeature(InstrumentFractionFeature): 0.6 ''' id = 'I15' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Saxophone Fraction' - self.description = ('Fraction of all Note Ons belonging to saxophone patches ' - '(General MIDI patches 65 through 68).') - self.isSequential = True - self.dimensions = 1 - - self._targetPrograms = [64, 65, 66, 67] + name = 'Saxophone Fraction' + description = ('Fraction of all Note Ons belonging to saxophone patches ' + '(General MIDI patches 65 through 68).') + _targetPrograms = [64, 65, 66, 67] class BrassFractionFeature(InstrumentFractionFeature): @@ -3905,17 +3424,10 @@ class BrassFractionFeature(InstrumentFractionFeature): ''' id = 'I16' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Brass Fraction' - self.description = ('Fraction of all Note Ons belonging to brass patches ' - '(General MIDI patches 57 through 68).') # note: incorrect - self.isSequential = True - self.dimensions = 1 - - self._targetPrograms = list(range(56, 62)) + name = 'Brass Fraction' + description = ('Fraction of all Note Ons belonging to brass patches ' + '(General MIDI patches 57 through 68).') # note: incorrect + _targetPrograms = list(range(56, 62)) class WoodwindsFractionFeature(InstrumentFractionFeature): @@ -3936,17 +3448,10 @@ class WoodwindsFractionFeature(InstrumentFractionFeature): ''' id = 'I17' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Woodwinds Fraction' - self.description = ('Fraction of all Note Ons belonging to woodwind patches ' - '(General MIDI patches 69 through 76).') - self.isSequential = True - self.dimensions = 1 - - self._targetPrograms = list(range(68, 80)) # include ocarina! + name = 'Woodwinds Fraction' + description = ('Fraction of all Note Ons belonging to woodwind patches ' + '(General MIDI patches 69 through 76).') + _targetPrograms = list(range(68, 80)) # include ocarina! class OrchestralStringsFractionFeature(InstrumentFractionFeature): @@ -3965,22 +3470,15 @@ class OrchestralStringsFractionFeature(InstrumentFractionFeature): ''' id = 'I18' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Orchestral Strings Fraction' - self.description = ('Fraction of all Note Ons belonging to orchestral strings patches ' - '(General MIDI patches 41 or 47).') - self.isSequential = True - self.dimensions = 1 - - self._targetPrograms = list(range(41, 46)) + name = 'Orchestral Strings Fraction' + description = ('Fraction of all Note Ons belonging to orchestral strings patches ' + '(General MIDI patches 41 or 47).') + _targetPrograms = list(range(41, 46)) class StringEnsembleFractionFeature(InstrumentFractionFeature): ''' - Not implemented + Not implemented. Fraction of all Note Ons belonging to string ensemble patches (General MIDI patches 49 to 52). @@ -3988,17 +3486,10 @@ class StringEnsembleFractionFeature(InstrumentFractionFeature): # TODO: add tests, do not yet have instrument to model id = 'I19' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'String Ensemble Fraction' - self.description = ('Fraction of all Note Ons belonging to string ensemble patches ' - '(General MIDI patches 49 to 52).') - self.isSequential = True - self.dimensions = 1 - - self._targetPrograms = [48, 49, 50, 51] + name = 'String Ensemble Fraction' + description = ('Fraction of all Note Ons belonging to string ensemble patches ' + '(General MIDI patches 49 to 52).') + _targetPrograms = [48, 49, 50, 51] class ElectricInstrumentFractionFeature(InstrumentFractionFeature): @@ -4016,18 +3507,11 @@ class ElectricInstrumentFractionFeature(InstrumentFractionFeature): 0.8 ''' id = 'I20' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Electric Instrument Fraction' - self.description = ('Fraction of all Note Ons belonging to electric instrument patches ' - '(General MIDI patches 5, 6, 17, 19, 27 to 32 or 34 to 40).') - self.isSequential = True - self.dimensions = 1 - - self._targetPrograms = [4, 5, 16, 18, 26, 27, 28, 29, - 30, 31, 33, 34, 35, 36, 37, 38, 39] # accept synth bass + name = 'Electric Instrument Fraction' + description = ('Fraction of all Note Ons belonging to electric instrument patches ' + '(General MIDI patches 5, 6, 17, 19, 27 to 32 or 34 to 40).') + _targetPrograms = [4, 5, 16, 18, 26, 27, 28, 29, + 30, 31, 33, 34, 35, 36, 37, 38, 39] # accept synth bass # ----------------------------------------------------------------------------- diff --git a/music21/features/native.py b/music21/features/native.py index 69ce97ffb..6a7cce173 100644 --- a/music21/features/native.py +++ b/music21/features/native.py @@ -9,6 +9,8 @@ # ------------------------------------------------------------------------------ ''' Original music21 feature extractors. + +Type annotations in this module were added with AI assistance (Claude). ''' from __future__ import annotations @@ -88,12 +90,8 @@ class QualityFeature(featuresModule.FeatureExtractor): ''' id = 'P22' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Quality' - self.description = ''' + name = 'Quality' + description = ''' Set to 0 if the Key or KeySignature indicates that a recording is major, set to 1 if it indicates that it is minor. @@ -101,8 +99,6 @@ def __init__(self, dataOrStream=None, **keywords): modes in the keys, analyze the piece to discover what mode it is most likely in. ''' - self.isSequential = True - self.dimensions = 1 def process(self) -> None: ''' @@ -112,15 +108,14 @@ def process(self) -> None: raise ValueError('Cannot process without a data instance or feature.') allKeys = self.data['flat.getElementsByClass(Key)'] - keyFeature: int|None = None if len(allKeys) == 1: k0 = allKeys[0] if k0.mode == 'major': - keyFeature = 0 + self.feature.vector[0] = 0 + return elif k0.mode == 'minor': - keyFeature = 1 - self.feature.vector[0] = keyFeature - return + self.feature.vector[0] = 1 + return useKey = None if len(allKeys) == 1: @@ -172,20 +167,17 @@ class TonalCertainty(featuresModule.FeatureExtractor): [0.0] ''' id = 'K1' # TODO: need id + name = 'Tonal Certainty' + description = ('A floating point magnitude value that suggest tonal ' + 'certainty based on automatic key analysis.') + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Tonal Certainty' - self.description = ('A floating point magnitude value that suggest tonal ' - 'certainty based on automatic key analysis.') - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') self.feature.vector[0] = self.data['flat.analyzedKey.tonalCertainty'] @@ -205,15 +197,10 @@ class FirstBeatAttackPrevalence(featuresModule.FeatureExtractor): TODO: Implement! ''' id = 'MP1' - - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'First Beat Attack Prevalence' - self.description = ('Fraction of first beats of a measure that have notes ' - 'that start on this beat.') - self.dimensions = 1 - self.discrete = False + name = 'First Beat Attack Prevalence' + description = ('Fraction of first beats of a measure that have notes ' + 'that start on this beat.') + discrete = False # ------------------------------------------------------------------------------ @@ -228,19 +215,15 @@ class UniqueNoteQuarterLengths(featuresModule.FeatureExtractor): [3] ''' id = 'QL1' + name = 'Unique Note Quarter Lengths' + description = 'The number of unique note quarter lengths.' - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Unique Note Quarter Lengths' - self.description = 'The number of unique note quarter lengths.' - self.dimensions = 1 - self.discrete = True - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') count = 0 histo = self.data['flat.notes.quarterLengthHistogram'] for key in histo: @@ -258,19 +241,16 @@ class MostCommonNoteQuarterLength(featuresModule.FeatureExtractor): [1.0] ''' id = 'QL2' + name = 'Most Common Note Quarter Length' + description = 'The value of the most common quarter length.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Most Common Note Quarter Length' - self.description = 'The value of the most common quarter length.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['flat.notes.quarterLengthHistogram'] maximum = 0 ql = 0 @@ -292,19 +272,16 @@ class MostCommonNoteQuarterLengthPrevalence(featuresModule.FeatureExtractor): [0.60...] ''' id = 'QL3' + name = 'Most Common Note Quarter Length Prevalence' + description = 'Fraction of notes that have the most common quarter length.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Most Common Note Quarter Length Prevalence' - self.description = 'Fraction of notes that have the most common quarter length.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') summation = 0 # count of all histo = self.data['flat.notes.quarterLengthHistogram'] if not histo: @@ -329,19 +306,16 @@ class RangeOfNoteQuarterLengths(featuresModule.FeatureExtractor): [1.5] ''' id = 'QL4' + name = 'Range of Note Quarter Lengths' + description = 'Difference between the longest and shortest quarter lengths.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Range of Note Quarter Lengths' - self.description = 'Difference between the longest and shortest quarter lengths.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') histo = self.data['flat.notes.quarterLengthHistogram'] if not histo: raise NativeFeatureException('input lacks notes') @@ -370,19 +344,16 @@ class UniquePitchClassSetSimultaneities(featuresModule.FeatureExtractor): [27] ''' id = 'CS1' + name = 'Unique Pitch Class Set Simultaneities' + description = 'Number of unique pitch class simultaneities.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Unique Pitch Class Set Simultaneities' - self.description = 'Number of unique pitch class simultaneities.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') count = 0 histo = self.data['chordify.flat.getElementsByClass(Chord).pitchClassSetHistogram'] for key in histo: @@ -402,19 +373,16 @@ class UniqueSetClassSimultaneities(featuresModule.FeatureExtractor): [14] ''' id = 'CS2' + name = 'Unique Set Class Simultaneities' + description = 'Number of unique set class simultaneities.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Unique Set Class Simultaneities' - self.description = 'Number of unique set class simultaneities.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') count = 0 histo = self.data['chordify.flat.getElementsByClass(Chord).setClassHistogram'] for key in histo: @@ -435,20 +403,17 @@ class MostCommonPitchClassSetSimultaneityPrevalence( [0.134...] ''' id = 'CS3' + name = 'Most Common Pitch Class Set Simultaneity Prevalence' + description = ('Fraction of all pitch class simultaneities that are ' + 'the most common simultaneity.') + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Most Common Pitch Class Set Simultaneity Prevalence' - self.description = ('Fraction of all pitch class simultaneities that are ' - 'the most common simultaneity.') - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') summation = 0 # count of all histo = self.data['chordify.flat.getElementsByClass(Chord).pitchClassSetHistogram'] maxKey = 0 # max found for any one key @@ -481,20 +446,17 @@ class MostCommonSetClassSimultaneityPrevalence(featuresModule.FeatureExtractor): [0.235...] ''' id = 'CS4' + name = 'Most Common Set Class Simultaneity Prevalence' + description = ('Fraction of all set class simultaneities that ' + 'are the most common simultaneity.') + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Most Common Set Class Simultaneity Prevalence' - self.description = ('Fraction of all set class simultaneities that ' - 'are the most common simultaneity.') - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') summation = 0 # count of all histo = self.data['chordify.flat.getElementsByClass(Chord).setClassHistogram'] if not histo: @@ -522,19 +484,16 @@ class MajorTriadSimultaneityPrevalence(featuresModule.FeatureExtractor): [0.46...] ''' id = 'CS5' + name = 'Major Triad Simultaneity Prevalence' + description = 'Percentage of all simultaneities that are major triads.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Major Triad Simultaneity Prevalence' - self.description = 'Percentage of all simultaneities that are major triads.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # use for total number of chords total = len(self.data['chordify.flat.getElementsByClass(Chord)']) histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram'] @@ -556,19 +515,16 @@ class MinorTriadSimultaneityPrevalence(featuresModule.FeatureExtractor): [0.211...] ''' id = 'CS6' + name = 'Minor Triad Simultaneity Prevalence' + description = 'Percentage of all simultaneities that are minor triads.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Minor Triad Simultaneity Prevalence' - self.description = 'Percentage of all simultaneities that are minor triads.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # use for total number of chords total = len(self.data['chordify.flat.getElementsByClass(Chord)']) histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram'] @@ -590,19 +546,16 @@ class DominantSeventhSimultaneityPrevalence(featuresModule.FeatureExtractor): [0.076...] ''' id = 'CS7' + name = 'Dominant Seventh Simultaneity Prevalence' + description = 'Percentage of all simultaneities that are dominant seventh.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Dominant Seventh Simultaneity Prevalence' - self.description = 'Percentage of all simultaneities that are dominant seventh.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # use for total number of chords total = len(self.data['chordify.flat.getElementsByClass(Chord)']) histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram'] @@ -624,19 +577,16 @@ class DiminishedTriadSimultaneityPrevalence(featuresModule.FeatureExtractor): [0.019...] ''' id = 'CS8' + name = 'Diminished Triad Simultaneity Prevalence' + description = 'Percentage of all simultaneities that are diminished triads.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Diminished Triad Simultaneity Prevalence' - self.description = 'Percentage of all simultaneities that are diminished triads.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # use for total number of chords total = len(self.data['chordify.flat.getElementsByClass(Chord)']) histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram'] @@ -651,7 +601,7 @@ def process(self): class TriadSimultaneityPrevalence(featuresModule.FeatureExtractor): ''' Gives the proportion of all simultaneities which form triads (major, - minor, diminished, or augmented) + minor, diminished, or augmented). >>> s = corpus.parse('bwv66.6') >>> fe = features.native.TriadSimultaneityPrevalence(s) @@ -663,19 +613,16 @@ class TriadSimultaneityPrevalence(featuresModule.FeatureExtractor): [0.02272727...] ''' id = 'CS9' + name = 'Triad Simultaneity Prevalence' + description = 'Proportion of all simultaneities that form triads.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Triad Simultaneity Prevalence' - self.description = 'Proportion of all simultaneities that form triads.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # use for total number of chords total = len(self.data['chordify.flat.getElementsByClass(Chord)']) histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram'] @@ -697,19 +644,16 @@ class DiminishedSeventhSimultaneityPrevalence(featuresModule.FeatureExtractor): [0.0] ''' id = 'CS10' + name = 'Diminished Seventh Simultaneity Prevalence' + description = 'Percentage of all simultaneities that are diminished seventh chords.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Diminished Seventh Simultaneity Prevalence' - self.description = 'Percentage of all simultaneities that are diminished seventh chords.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # use for total number of chords total = len(self.data['chordify.flat.getElementsByClass(Chord)']) histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram'] @@ -741,19 +685,16 @@ class IncorrectlySpelledTriadPrevalence(featuresModule.FeatureExtractor): [0.02...] ''' id = 'CS11' + name = 'Incorrectly Spelled Triad Prevalence' + description = 'Percentage of all triads that are spelled incorrectly.' + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Incorrectly Spelled Triad Prevalence' - self.description = 'Percentage of all triads that are spelled incorrectly.' - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # use for total number of chords histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram'] if not histo: @@ -783,7 +724,7 @@ class ChordBassMotionFeature(featuresModule.FeatureExtractor): of all chord motion of music21.harmony.Harmony objects that move up by i-half-steps. (a half-step motion down would be stored in i = 11). i = 0 is always 0.0 since consecutive - chords on the same pitch are ignored (unless there are 0 or 1 harmonies, in which case it is 1) + chords on the same pitch are ignored (unless there are 0 or 1 harmonies, in which case it is 1). Sample test on Dylan's Blowing In The Wind (not included), showing all motion is 3rds, 6ths, or especially 4ths and 5ths. @@ -803,21 +744,19 @@ class ChordBassMotionFeature(featuresModule.FeatureExtractor): ''' id = 'CS12' + name = 'Chord Bass Motion' + description = ('12-element vector showing the fraction of chords that move ' + 'by x semitones (where x=0 is always 0 unless there are 0 ' + 'or 1 harmonies, in which case it is 1).') + dimensions = 12 + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Chord Bass Motion' - self.description = ('12-element vector showing the fraction of chords that move ' - 'by x semitones (where x=0 is always 0 unless there are 0 ' - 'or 1 harmonies, in which case it is 1).') - self.dimensions = 12 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # use for total number of chords harms = self.data['flat.getElementsByClass(Harmony)'] @@ -879,20 +818,17 @@ class LandiniCadence(featuresModule.FeatureExtractor): Return a boolean if one or more Parts end with a Landini-like cadential figure. ''' id = 'MC1' + name = 'Ends With Landini Melodic Contour' + description = ('Boolean that indicates the presence of a Landini-like ' + 'cadential figure in one or more parts.') + discrete = False - def __init__(self, dataOrStream=None, **keywords): - super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Ends With Landini Melodic Contour' - self.description = ('Boolean that indicates the presence of a Landini-like ' - 'cadential figure in one or more parts.') - self.dimensions = 1 - self.discrete = False - - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') # store plausible ending half step movements # these need to be lists for comparison match = [[-2, 3], [-1, -2, 3]] @@ -935,9 +871,8 @@ def process(self): class LanguageFeature(featuresModule.FeatureExtractor): ''' - language of text as a number - the number is the index of text.LanguageDetector.languageCodes + 1 - or 0 if there is no language. + Language of the text as a number. The number is the index of + text.LanguageDetector.languageCodes + 1, or 0 if there is no language. Detect that the language of a Handel aria is Italian. @@ -948,21 +883,20 @@ class LanguageFeature(featuresModule.FeatureExtractor): ''' id = 'TX1' + name = 'Language Feature' + description = ('Language of the lyrics of the piece given as a numeric ' + 'value from text.LanguageDetector.mostLikelyLanguageNumeric().') - def __init__(self, dataOrStream=None, **keywords): + def __init__(self, dataOrStream=None, **keywords) -> None: super().__init__(dataOrStream=dataOrStream, **keywords) - - self.name = 'Language Feature' - self.description = ('Language of the lyrics of the piece given as a numeric ' - 'value from text.LanguageDetector.mostLikelyLanguageNumeric().') - self.dimensions = 1 - self.discrete = True self.languageDetector = text.LanguageDetector() - def process(self): + def process(self) -> None: ''' Do processing necessary, storing result in feature. ''' + if self.data is None or self.feature is None: # pragma: no cover + raise ValueError('Cannot process without a data instance or feature.') storedLyrics = self.data['assembledLyrics'] self.feature.vector[0] = self.languageDetector.mostLikelyLanguageNumeric(storedLyrics) diff --git a/music21/features/outputFormats.py b/music21/features/outputFormats.py index dadf844d1..5e6a06d7e 100644 --- a/music21/features/outputFormats.py +++ b/music21/features/outputFormats.py @@ -1,8 +1,15 @@ from __future__ import annotations +import pathlib +import typing as t +import unittest + from music21 import environment from music21 import exceptions21 +if t.TYPE_CHECKING: + from music21.features.base import DataSet + environLocal = environment.Environment('features.outputFormats') @@ -15,24 +22,26 @@ class OutputFormat: Provide output for a DataSet, which is passed in as an initial argument. ''' - def __init__(self, dataSet=None): - # assume a two dimensional array - self.ext = None # store a file extension if necessary + def __init__(self, dataSet: DataSet|None = None) -> None: + # assume a two-dimensional array + self.ext: str = '' # store a file extension if necessary # pass a data set object self._dataSet = dataSet - def getHeaderLines(self): + def getHeaderLines(self) -> list: ''' Get the header as a list of lines. ''' - pass # define in subclass + return [] # define in subclass - def getString(self, includeClassLabel=True, includeId=True, lineBreak=None): - pass # define in subclass + def getString(self, includeClassLabel: bool = True, includeId: bool = True, + lineBreak: str = '\n') -> str: + return '' # define in subclass - def write(self, fp=None, includeClassLabel=True, includeId=True): + def write(self, fp: str|pathlib.Path|None = None, + includeClassLabel: bool = True, includeId: bool = True) -> str|pathlib.Path: ''' - Write the file. If not file path is given, a temporary file will be written. + Write the file. If no file path is given, a temporary file will be written. ''' if fp is None: fp = environLocal.getTempFile(suffix=self.ext) @@ -53,11 +62,12 @@ class OutputTabOrange(OutputFormat): https://orange3.readthedocs.io/projects/orange-data-mining-library/en/latest/tutorial/data.html#saving-the-data ''' - def __init__(self, dataSet=None): + def __init__(self, dataSet: DataSet|None = None) -> None: super().__init__(dataSet=dataSet) self.ext = '.tab' - def getHeaderLines(self, includeClassLabel=True, includeId=True): + def getHeaderLines(self, includeClassLabel: bool = True, + includeId: bool = True) -> list[list[str]]: # noinspection PyShadowingNames ''' Get the header as a list of lines. @@ -80,6 +90,8 @@ def getHeaderLines(self, includeClassLabel=True, includeId=True): ['meta', '', 'class'] ''' + if self._dataSet is None: # pragma: no cover + raise OutputFormatException('cannot get header lines without a DataSet') post = [] post.append(self._dataSet.getAttributeLabels( includeClassLabel=includeClassLabel, includeId=includeId)) @@ -108,12 +120,13 @@ def getHeaderLines(self, includeClassLabel=True, includeId=True): post.append(row) return post - def getString(self, includeClassLabel=True, includeId=True, lineBreak=None): + def getString(self, includeClassLabel: bool = True, includeId: bool = True, + lineBreak: str = '\n') -> str: ''' Get the complete DataSet as a string with the appropriate headers. ''' - if lineBreak is None: - lineBreak = '\n' + if self._dataSet is None: # pragma: no cover + raise OutputFormatException('cannot get a string without a DataSet') msg = [] header = self.getHeaderLines(includeClassLabel=includeClassLabel, includeId=includeId) @@ -132,11 +145,12 @@ class OutputCSV(OutputFormat): Comma-separated value list. ''' - def __init__(self, dataSet=None): + def __init__(self, dataSet: DataSet|None = None) -> None: super().__init__(dataSet=dataSet) self.ext = '.csv' - def getHeaderLines(self, includeClassLabel=True, includeId=True): + def getHeaderLines(self, includeClassLabel: bool = True, + includeId: bool = True) -> list[list[str]]: ''' Get the header as a list of lines. @@ -147,14 +161,17 @@ def getHeaderLines(self, includeClassLabel=True, includeId=True): >>> of.getHeaderLines()[0] ['Identifier', 'Changes_of_Meter', 'Composer'] ''' + if self._dataSet is None: # pragma: no cover + raise OutputFormatException('cannot get header lines without a DataSet') post = [] post.append(self._dataSet.getAttributeLabels( includeClassLabel=includeClassLabel, includeId=includeId)) return post - def getString(self, includeClassLabel=True, includeId=True, lineBreak=None): - if lineBreak is None: - lineBreak = '\n' + def getString(self, includeClassLabel: bool = True, includeId: bool = True, + lineBreak: str = '\n') -> str: + if self._dataSet is None: # pragma: no cover + raise OutputFormatException('cannot get a string without a DataSet') msg = [] header = self.getHeaderLines(includeClassLabel=includeClassLabel, includeId=includeId) @@ -181,11 +198,12 @@ class OutputARFF(OutputFormat): '.arff' ''' - def __init__(self, dataSet=None): + def __init__(self, dataSet: DataSet|None = None) -> None: super().__init__(dataSet=dataSet) self.ext = '.arff' - def getHeaderLines(self, includeClassLabel=True, includeId=True): + def getHeaderLines(self, includeClassLabel: bool = True, + includeId: bool = True) -> list[str]: ''' Get the header as a list of lines. @@ -201,6 +219,8 @@ def getHeaderLines(self, includeClassLabel=True, includeId=True): @DATA ''' + if self._dataSet is None: # pragma: no cover + raise OutputFormatException('cannot get header lines without a DataSet') post = [] # get three parallel lists @@ -224,15 +244,16 @@ def getHeaderLines(self, includeClassLabel=True, includeId=True): post.append(f'@ATTRIBUTE {attrLabel} NUMERIC') else: values = self._dataSet.getUniqueClassValues() - joined = ','.join(values) + joined = ','.join(str(v) for v in values) post.append('@ATTRIBUTE class {' + joined + '}') # include start of data declaration post.append('@DATA') return post - def getString(self, includeClassLabel=True, includeId=True, lineBreak=None): - if lineBreak is None: - lineBreak = '\n' + def getString(self, includeClassLabel: bool = True, includeId: bool = True, + lineBreak: str = '\n') -> str: + if self._dataSet is None: # pragma: no cover + raise OutputFormatException('cannot get a string without a DataSet') msg = [] @@ -252,6 +273,38 @@ def getString(self, includeClassLabel=True, includeId=True, lineBreak=None): return lineBreak.join(msg) +class Test(unittest.TestCase): + + def testARFFNumericClassValues(self): + ''' + Regression test: numeric (non-string) class values must not crash + ARFF header generation. Previously the class declaration was built + with ','.join(values), which raised a TypeError when the class + values were ints or floats rather than strings. + + AI-assisted (Claude). + ''' + from music21 import converter + from music21 import features + + ds = features.DataSet(classLabel='Meter') + ds.addFeatureExtractors(features.extractorsById(['r31'])) + s1 = converter.parse('tinynotation: 4/4 c4 d e f') + s2 = converter.parse('tinynotation: 3/4 c4 d e') + # integer class values, not strings + ds.addMultipleData([s1, s2], classValues=[3, 4]) + ds.process() + + self.assertEqual(ds.getUniqueClassValues(), [3, 4]) + + of = features.outputFormats.OutputARFF(dataSet=ds) + classLines = [line for line in of.getHeaderLines() + if line.startswith('@ATTRIBUTE class')] + self.assertEqual(classLines, ['@ATTRIBUTE class {3,4}']) + # building the full string must not raise either + self.assertIn('@ATTRIBUTE class {3,4}', of.getString()) + + if __name__ == '__main__': import music21 music21.mainTest() diff --git a/music21/lily/lilyObjects.py b/music21/lily/lilyObjects.py index c04d5151e..2f5a61627 100644 --- a/music21/lily/lilyObjects.py +++ b/music21/lily/lilyObjects.py @@ -204,7 +204,7 @@ def setAttributesFromClassObject(self, classLookup, m21Object): def _reprInternal(self) -> str: msg = str(self) - msg.replace('\n', ' ') + msg = msg.replace('\n', ' ') if len(msg) >= 13: msg = msg[:10] + '...' return msg diff --git a/music21/stream/base.py b/music21/stream/base.py index b6a046c3d..a44db74e6 100644 --- a/music21/stream/base.py +++ b/music21/stream/base.py @@ -99,6 +99,18 @@ class StreamDeprecationWarning(UserWarning): OffsetMap = namedtuple('OffsetMap', ['element', 'offset', 'endTime', 'voiceIndex']) +class SecondsMapEntry(t.TypedDict): + ''' + A typed dictionary describing the real-time characteristics of one element, + as returned in the list produced by :attr:`~music21.stream.base.Stream.secondsMap`. + ''' + offsetSeconds: float + durationSeconds: float + endTimeSeconds: float + element: base.Music21Object + voiceIndex: int|None + + # ----------------------------------------------------------------------------- class Stream[M21ObjType: base.Music21Object](core.StreamCore): ''' @@ -8773,7 +8785,7 @@ def _accumulatedSeconds(self, mmBoundaries, oStart, oEnd): activeStart = activeEnd return totalSeconds - def _getSecondsMap(self, srcObj=None): + def _getSecondsMap(self, srcObj=None) -> list[SecondsMapEntry]: ''' Return a list of dictionaries for all elements in this Stream, where each dictionary defines the real-time characteristics of @@ -8789,7 +8801,8 @@ def _getSecondsMap(self, srcObj=None): # not sure if this should be taken from the flat representation lowestOffset = srcObj.lowestOffset - secondsMap = [] # list of start, start+dur, element + secondsMap: list[SecondsMapEntry] = [] # list of start, start+dur, element + groups: list[tuple[Stream, int|None]] if srcObj.hasVoices(): groups = [] for i, v in enumerate(srcObj.voices): @@ -8805,20 +8818,16 @@ def _getSecondsMap(self, srcObj=None): continue dur = e.duration.quarterLength offset = round(e.getOffsetBySite(group), 8) - # calculate all time regions given this offset - - # all stored values are seconds - # noinspection PyDictCreation - secondsDict = {} - secondsDict['offsetSeconds'] = srcObj._accumulatedSeconds( - mmBoundaries, lowestOffset, offset) - secondsDict['durationSeconds'] = srcObj._accumulatedSeconds( - mmBoundaries, offset, offset + dur) - secondsDict['endTimeSeconds'] = (secondsDict['offsetSeconds'] - + secondsDict['durationSeconds']) - secondsDict['element'] = e - secondsDict['voiceIndex'] = voiceIndex - secondsMap.append(secondsDict) + # calculate all time regions given this offset; all values are seconds + offsetSeconds = srcObj._accumulatedSeconds(mmBoundaries, lowestOffset, offset) + durationSeconds = srcObj._accumulatedSeconds(mmBoundaries, offset, offset + dur) + secondsMap.append(SecondsMapEntry( + offsetSeconds=offsetSeconds, + durationSeconds=durationSeconds, + endTimeSeconds=offsetSeconds + durationSeconds, + element=e, + voiceIndex=voiceIndex, + )) return secondsMap # do not make a property decorator since _getSecondsMap takes arguments