[tracker] data-generators: Create more useful information for music generation
- From: Martyn James Russell <mr src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [tracker] data-generators: Create more useful information for music generation
- Date: Wed, 2 Dec 2009 16:58:14 +0000 (UTC)
commit 3400c6a781486d69d54939101070e4877cc3c500
Author: Martyn Russell <martyn lanedo com>
Date: Wed Dec 2 18:09:59 2009 +0200
data-generators: Create more useful information for music generation
utils/data-generators/generate-data-for-music.py | 203 +++++++++++++---------
utils/data-generators/generate-name.py | 200 +++++++++++++++++++++
2 files changed, 318 insertions(+), 85 deletions(-)
---
diff --git a/utils/data-generators/generate-data-for-music.py b/utils/data-generators/generate-data-for-music.py
index f935e68..25acb30 100755
--- a/utils/data-generators/generate-data-for-music.py
+++ b/utils/data-generators/generate-data-for-music.py
@@ -1,10 +1,10 @@
#! /usr/bin/env python
+import os
import sys
import random
import urllib
-
artist_UID = {}
album_UID = {}
fileid = 0
@@ -20,96 +20,116 @@ def printHeader():
f.write("@prefix nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#>.\n");
f.write("@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.\n\n")
-def updatetag(artistid, albumid, trackid, genreid):
+def generate_name():
+ name = os.popen('./generate-name.py').read()
+
+ first_name = ""
+ last_name = ""
+
+ for line in name.splitlines():
+ if not first_name:
+ first_name = line
+ continue
+
+ if not last_name:
+ last_name = line
+ continue
+
+ full_name = '%s %s' % (first_name, last_name)
+ return full_name
+
+def update_tag(artistid, artistname, albumid, trackid, genreid):
global fileid
- length = 0
- length=random.randint(5000,5000000 )
- song = 'SongTitle [%03u]' % fileid
- artist = 'TrkArtist [%03u]' % artistid
- album = 'TrkAlbum [%03u]' % albumid
- genre = 'Genre-[%03u]' %genreid
+ length = random.randint(5000,5000000)
+ song = 'SongTitle%03u' % fileid
+ album = 'Album%03u' % albumid
+ genre = 'Genre%03u' % genreid
trackstr = str(artistid) + '/' + str(trackid)
- fullpath = '/home/abc/d/e/%03u.mp3' %fileid
- fileid +=1
- year = '2009'
-
- size ='%03u' %fileid
- modified = "2009-07-17T15:18:16"
- created = "2009-07-17T15:18:16"
+ fullpath = '/home/foo/music/%s/%s/%03u.mp3' % (artistname, album, trackid)
+ fileid += 1
+ year = '%04u' % random.randint(1950, 2010)
+ size = '%03u' % random.randint(3 * 1024, 10 * 1024)
+ modified = "%04u-%02u-%02uT15:18:16" % (random.randint(1950, 2010),
+ random.randint(1, 12),
+ random.randint(1, 28))
+ created = modified
- if not artist_UID.has_key(artist):
+ if not artist_UID.has_key(artistname):
#print " The new artist is "+artist
UID = str(random.randint(0, sys.maxint))
- artist_UID[artist] = UID
- f.write('<urn:uuid:'+UID+'> a nco:Contact; \n')
- f.write('\tnco:fullname "'+artist+'".\n\n')
-
-
- else :
- UID = artist_UID[artist]
+ artist_UID[artistname] = UID
+ f.write('<urn:uuid:' + UID + '> a nco:Contact; \n')
+ f.write('\tnco:fullname "' + artistname + '".\n\n')
+ else:
+ UID = artist_UID[artistname]
if not album_UID.has_key(album):
album_UID[album] = album
- f.write('<urn:album:'+album+'> a nmm:MusicAlbum; \n')
+ f.write('<urn:album:' + album + '> a nmm:MusicAlbum; \n')
- if len(UID)>0: f.write('\tnmm:albumArtist <urn:uuid:'+UID+'>;\n')
- f.write('\tnie:title "'+album+'".\n\n')
+ if len(UID)>0:
+ f.write('\tnmm:albumArtist <urn:uuid:' + UID + '>;\n')
- else :
- UID = artist_UID[artist]
+ f.write('\tnie:title "' + album + '".\n\n')
+ else:
+ UID = artist_UID[artistname]
+ f.write('<file://' + urllib.pathname2url(fullpath) + '> a nmm:MusicPiece,nfo:FileDataObject;\n')
+ if len(song) > 0:
+ f.write('\tnie:title "' + song + '";\n')
- f.write('<file://'+urllib.pathname2url(fullpath)+'> a nmm:MusicPiece,nfo:FileDataObject;\n')
- if len(song)>0:f.write('\tnie:title "'+song+'";\n')
- f.write('\tnfo:fileName \"%03u.mp3\";\n' %fileid)
- f.write('\tnfo:fileLastModified "'+modified+'" ;\n')
- f.write('\tnfo:fileCreated "'+created+'";\n')
- f.write('\tnfo:fileSize '+str(size)+';\n')
- f.write('\tnmm:musicAlbum <urn:album:'+album+'>;\n')
- f.write('\tnmm:genre "'+genre+'";\n')
- if len(trackstr)>0:
- trackArray=trackstr.split("/")
- if len(trackArray)>0: f.write('\tnmm:trackNumber '+trackArray[0]+';\n')
+ f.write('\tnfo:fileName \"' + artistname + '.mp3\";\n')
+ f.write('\tnfo:fileLastModified "' + modified + '" ;\n')
+ f.write('\tnfo:fileCreated "' + created + '";\n')
+ f.write('\tnfo:fileSize ' + str(size) + ';\n')
+ f.write('\tnmm:musicAlbum <urn:album:' + album + '>;\n')
+ f.write('\tnmm:genre "' + genre + '";\n')
+ if len(trackstr) > 0:
+ trackArray = trackstr.split("/")
+ if len(trackArray) > 0:
+ f.write('\tnmm:trackNumber ' + trackArray[0] + ';\n')
- f.write('\tnmm:length '+str(length)+';\n')
- f.write('\tnmm:performer <urn:uuid:'+UID+'>.\n\n')
+ f.write('\tnmm:length ' + str(length) + ';\n')
+ f.write('\tnmm:performer <urn:uuid:' + UID + '>.\n\n')
+def create_track(artistid, albumid, genreid, settings):
+ artistname = generate_name()
+ for trackid in range(1, settings['TitlesPerAlbum'] + 1):
+ update_tag(artistid, artistname, albumid, trackid, genreid)
-def create_track( artistid, albumid, genreid, settings):
- for trackid in range(1, settings['TitlesPerAlbum'] + 1):
- updatetag(artistid, albumid, trackid, genreid)
genreid += 1
if genreid > settings['GenreCount']:
- genreid = 1
- return genreid
+ genreid = 1
+ return genreid
def generate(settings):
- ''' A total of TotalTracks files will be generated.
+ ''' A total of TotalTracks files will be generated.
These contain the specified number of albums.'''
- '''
- filepath = settings['OutputDir']
- try:
+ '''
+ filepath = settings['OutputDir']
+ try:
os.makedirs(filepath)
- except:
+ except:
print 'Directory exists'
- '''
-
- global album_UID
- genreid = 1
- artistid = 1
- albumid = 0
- for artistid in range(1, settings['ArtistCount'] + 1):
- album_UID = {}
+ '''
+
+ global album_UID
+ genreid = 1
+ artistid = 1
+ albumid = 0
+
+ for artistid in range(1, settings['ArtistCount'] + 1):
+ album_UID = {}
+
for albums in range(1, settings['AlbumCount'] + 1):
- albumid+=1
+ albumid += 1
genreid = create_track(artistid, albumid, genreid, settings)
-
if __name__ == '__main__':
settings = {}
@@ -117,32 +137,48 @@ if __name__ == '__main__':
parser = OptionParser()
- parser.add_option("-T", "--TotalTracks", dest='TotalTracks',
- help="Specify (mandatory) the total number of files to be generated" , metavar="TotalTracks")
- parser.add_option("-r", "--ArtistCount", dest='ArtistCount', default=2,
- help="Specify (mandatory) the total number of Artists." , metavar="ArtistCount")
- parser.add_option("-a", "--album-count", dest='AlbumCount', default=5,
- help="Specify (mandatory) the number of albums per artist." , metavar="AlbumCount")
- parser.add_option("-g", "--genre-count", dest='GenreCount', default=10,
- help="Specify the genre count" , metavar="GenreCount")
- parser.add_option("-o", "--output", dest='OutputFileName', default='songlistDirect.ttl',
- help="Specify the output ttl filename. \
- E.g., -T 2000 -r 25 -a 20 -g 10 -o generated_songs.ttl" , metavar="OutputFileName")
+ parser.add_option("-T", "--TotalTracks",
+ dest='TotalTracks',
+ help="Specify (mandatory) the total number of files to be generated",
+ metavar="TotalTracks")
+ parser.add_option("-r", "--ArtistCount",
+ dest='ArtistCount',
+ default=2,
+ help="Specify (mandatory) the total number of Artists." ,
+ metavar="ArtistCount")
+ parser.add_option("-a", "--album-count",
+ dest='AlbumCount',
+ default=5,
+ help="Specify (mandatory) the number of albums per artist.",
+ metavar="AlbumCount")
+ parser.add_option("-g", "--genre-count",
+ dest='GenreCount',
+ default=10,
+ help="Specify the genre count" ,
+ metavar="GenreCount")
+ parser.add_option("-o", "--output",
+ dest='OutputFileName',
+ default='songlistDirect.ttl',
+ help="Specify the output ttl filename. e.g. -T 2000 -r 25 -a 20 -g 10 -o generated_songs.ttl",
+ metavar="OutputFileName")
(options, args) = parser.parse_args()
mandatories = ['TotalTracks', 'ArtistCount', 'AlbumCount']
for m in mandatories:
- if not options.__dict__[m]:
- print "\nMandatory options missing\n"
- parser.print_help()
- sys.exit(-1)
-
- settings['TotalTracks'] = int(options.TotalTracks)
- if settings['TotalTracks'] < (int(options.ArtistCount) * int(options.AlbumCount) ):
+ if not options.__dict__[m]:
+ # Set defaults
+ if m == "TotalTracks":
+ options.TotalTracks = 5000
+ elif m == "ArtistCount":
+ options.ArtistCount = 60
+ elif m == "AlbumCount":
+ options.AlbumCount = 60
+
+ settings['TotalTracks'] = options.TotalTracks
+ if settings['TotalTracks'] < (options.ArtistCount * options.AlbumCount):
sys.exit('InputError: TotalTracks should be greater than or equal to ArtistCount * AlbumCount')
-
settings['TitlesPerAlbum'] = settings['TotalTracks'] / (int(options.ArtistCount) * int(options.AlbumCount))
#print 'settings[\'TitlesPerAlbum\'] %d' %settings['TitlesPerAlbum']
settings['ArtistCount'] = int(options.ArtistCount)
@@ -150,12 +186,9 @@ if __name__ == '__main__':
settings['GenreCount'] = int(options.GenreCount)
settings['OutputFileName'] = options.OutputFileName
- print '\n'+str(settings)+'\n'
+
+ print '\n' + str(settings) + '\n'
f = open(settings['OutputFileName'], 'w' )
printHeader()
generate(settings)
-
-
-
-
diff --git a/utils/data-generators/generate-name.py b/utils/data-generators/generate-name.py
new file mode 100755
index 0000000..61450fc
--- /dev/null
+++ b/utils/data-generators/generate-name.py
@@ -0,0 +1,200 @@
+#! /usr/bin/env python
+
+# Context-free grammar random name generator
+# Jeremy Thurgood <firxen gmail com>
+# Highly experimental at present, but sort of working
+
+import random
+import re
+import sys
+
+
+class GrammarError(RuntimeError):
+ pass
+
+class CFNameGen(object):
+ # This should be done using gettext for i18n, but I can't be bothered to figure
+ # out how to do it properly, so I'm using replacement strings for now.
+ stringUndefinedNonTerminal = "Undefined non-terminal \"%(undefinedNonTerminal)s\" in rule \"%(rule)s\"."
+
+ # Regular expression to catch non-terminals, used frequently, so global
+ reNonTerminal = re.compile(r"<(\w+)>")
+
+ def __init__(self, nameGrammar):
+ """Create a namegen object.
+
+ We take a grammar dict, as before the Great Refactoring.
+ """
+
+ self.checkTypes(nameGrammar)
+ self.grammar = nameGrammar
+
+ # checkTypes() is only useful while testing with internally specified grammars.
+ # Once we're parsing an external file it becomes unnecessary since we generate
+ # the data types ourselves instead of asking a human to do it. As such, error
+ # strings are hardcoded. Anyone who sees them would be messing around in here
+ # anyway.
+ def checkTypes(self, nameGrammar):
+ """Check given grammar object for correct datatypes.
+
+ This function is only really necessary while the grammar's still being
+ specified in here. It will likely disappear when we parse the grammar from a
+ data file.
+ """
+ if not isinstance(nameGrammar, dict):
+ raise GrammarError("Grammar data is not a dictionary!")
+ for rule, rhs in nameGrammar.items():
+ if not isinstance(rhs, list):
+ raise GrammarError("Rule \"%s\" is not a list!" % rule)
+ for option in rhs:
+ if not isinstance(option, str):
+ raise GrammarError("Rule \"%s\" does not contain only strings!" % rule)
+
+ # Grammar verification stuff follows. We can probably make this throw warnings
+ # and correct problems, but that's a job for another day. Incorrect grammars
+ # probably won't provide useful output anyway. If this stuff gets big enough
+ # it may be pushed into its own module.
+
+ def checkUndefinedNonTerminals(self, nameGrammar):
+ """Check given grammar for undefined non-terminals.
+
+ An undefined non-terminal is a non-terminal symbol used in a symbol
+ definition that has no definition of its own and cannot therefore be
+ expanded. Undefined non-terminals can lead to ugly error messages
+ instead of beautifully generated names.
+ """
+ for rule, rhs in nameGrammar.items():
+ for option in rhs:
+ tempStr = option
+ matchNonTerminal = self.reNonTerminal.search(tempStr)
+ while matchNonTerminal:
+ if matchNonTerminal.group(1) not in nameGrammar:
+ return {"undefinedNonTerminal": matchNonTerminal.group(1), "rule": rule}
+ tempStr = self.reNonTerminal.sub("", tempStr, 1)
+ matchNonTerminal = self.reNonTerminal.search(tempStr)
+
+ def checkUnproductiveNonTerminals(self, nameGrammar):
+ """Check grammar for possibly unproductive non-terminals.
+
+ An unproductive non-terminal is a non-terminal symbol that cannot be
+ converted to a terminal symbol in the given grammar. A good example of this
+ is a non-terminal symbol that includes itself in its definition.
+
+ This function is currently very basic and should be extended (rewritten?) to
+ allow warnings for _possible_ unproductive non-terminals and errors for
+ _definite_ unproductive non-terminals. Volunteers?
+
+ XXX: INCOMPLETE
+ """
+ def recurse(a):
+ if a == 5:
+ return a
+ return recurse(a+1)
+
+ grammarUnchecked = dict([(rule, "".join(rhs)) for (rule, rhs) in nameGrammar.items()])
+ grammarProductive = []
+ finished = False
+ while not finished:
+ print "grammarProductive:"
+ print grammarProductive
+ print "grammarUnchecked:"
+ print grammarUnchecked
+ print
+ finished = True
+ for rule, rhs in grammarUnchecked.items():
+ matchNonTerminal = reNonTerminal.search(rhs)
+ while matchNonTerminal:
+ matchString = matchNonTerminal.group(1)
+ if matchString not in grammarProductive:
+ break
+ rhs = rhs.replace("<"+matchString+">", "")
+ finished = False
+ matchNonTerminal = reNonTerminal.search(rhs)
+ if not matchNonTerminal:
+ grammarProductive.append(rule)
+ del grammarUnchecked[rule]
+ finished = False
+ continue
+ grammarUnchecked[rule] = rhs
+
+ # More grammar checking functions to come:
+ # Unused non-terminals
+ # Loop detection would be nice, but currently a little impractical.
+
+ def checkUnusedNonTerminals(self, nameGrammar):
+ """Check grammar for non-terminals that can never be reached.
+
+ While unused non-terminals are irrelevant in the generation of sentences,
+ their presence usually implies an error in the grammar.
+
+ XXX: INCOMPLETE
+ """
+
+ pass
+
+ # verifyGrammar() uses the above functions to verify the correctness of a
+ # grammar. This isn't perfect, but it should catch the most common problems.
+ def verifyGrammar(self):
+ error = self.checkUndefinedNonTerminals(self.grammar)
+ if error:
+ return stringUndefinedNonTerminal % error
+ if "name" not in self.grammar:
+ return "Rule \"name\" not present!"
+
+ # Now to the meat of the problem, which is actually almost trivial thanks to
+ # the dictionary data type. I love python ;-)
+
+ def getName(self):
+ nameStr = random.choice(self.grammar["name"])
+ matchNonTerminal = self.reNonTerminal.search(nameStr)
+ while matchNonTerminal:
+ subStr = random.choice(self.grammar[matchNonTerminal.group(1)])
+ nameStr = self.reNonTerminal.sub(subStr, nameStr, 1)
+ matchNonTerminal = self.reNonTerminal.search(nameStr)
+ return nameStr
+
+
+if __name__ == "__main__":
+ # Main body
+ # Test grammar -- will be read from a file when I decide how to do it properly
+ # with minimum effort (for the user and the code)
+ orkGrammar = {
+ "name": ["<nameStart><nameMiddle0to3><nameEnd>"],
+ "nameMiddle0to3": ["","<nameMiddle>", "<nameMiddle><nameMiddle>", "<nameMiddle><nameMiddle><nameMiddle>"],
+ "nameStart": ["<nsCons><nmVowel>", "<nsCons><nmVowel>", "<nsCons><nmVowel>", "<nsVowel>"],
+ "nameMiddle": ["<nmCons><nmVowel>"],
+ "nameEnd": ["<neCons><neVowel>", "<neCons>", "<neCons>"],
+ "nsCons": ["D", "G", "K", "T", "Gr"],
+ "nmCons": ["d", "g", "k", "t", "r", "s", "z", "kt", "rs", "gr"],
+ "neCons": ["r", "s", "z"],
+ "nsVowel": ["E", "U"],
+ "nmVowel": ["a", "e", "i", "o", "u"],
+ "neVowel": ["a", "u"]
+ }
+
+ fooGrammar = {
+ "name": ["<nameStart><nameMiddle0to2><nameEnd>"],
+ "nameMiddle0to2": ["","<nameMiddle>", "<nameMiddle><nameMiddle>"],
+ "nameStart": ["<nsCons><nmVowel>", "<nsCons><nmVowel>", "<nsCons><nmVowel>", "<nsVowel>"],
+ "nameMiddle": ["<nmCons><nmVowel>"],
+ "nameEnd": ["<neCons><neVowel>", "<neCons>", "<neCons>"],
+ "nsCons": ["J", "M", "P", "N", "Y", "D", "F"],
+ "nmCons": ["l", "m", "lm", "th", "r", "s", "ss", "p", "f", "mb", "b", "lb", "d", "lf"],
+ "neCons": ["r", "n", "m", "s", "y", "l", "th", "b", "lb", "f", "lf"],
+ "nsVowel": ["A", "Au", "Ei"],
+ "nmVowel": ["a", "e", "i", "o", "u", "au", "oa", "ei"],
+ "neVowel": ["e", "i", "a", "au"]
+ }
+
+ fooGen = CFNameGen(fooGrammar)
+ errorStr = fooGen.verifyGrammar()
+ if errorStr:
+ sys.exit(errorStr)
+ print fooGen.getName()
+
+ orkGen = CFNameGen(orkGrammar)
+ errorStr = orkGen.verifyGrammar()
+ if errorStr:
+ sys.exit(errorStr)
+ print orkGen.getName()
+
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]