Commit a4a6f4a0 authored by Elizabeth Myers's avatar Elizabeth Myers 💬

Rework generation algorithm, let's see if this is better

parent 16b52169
......@@ -22,35 +22,21 @@
import random
import string
import warnings
# Monkey patch rng to use system rng
random.random = random.SystemRandom().random
def read_wordlist(filename, col, max_len):
""" Read a file and return a list of words. Skips words longer than max_len"""
with open(filename) as wordfile:
for word in wordfile:
word = word.strip().upper()
if len(word) <= max_len:
if isinstance(col, list):
col.append(word)
elif isinstance(col, set):
col.add(word)
else:
raise ValueError("col is not a list or a set")
class Generator:
""" Generates random 8.3 filenames """
validlast = string.digits
singlechars = string.digits
def __init__(self, wordfile="words.txt", extfile="exts.txt", badfile="bad.txt"):
self.wordlist = list()
read_wordlist(wordfile, self.wordlist, 8)
self.extlist = list()
read_wordlist(extfile, self.extlist, 3)
self.badwords = list()
read_wordlist(badfile, self.badwords, 12)
self.word_slots = self.read_word_slots(wordfile)
self.extlist = self.read_wordlist(extfile, 3)
self.badwords = self.read_wordlist(badfile, 12)
def __iter__(self):
return self
......@@ -58,70 +44,68 @@ class Generator:
def __next__(self):
return self.generate_clean()
@staticmethod
def read_word_slots(filename):
""" Read words into slots """
slots = [[], [], [], [], [], [], [], []]
with open(filename, "r") as wordfile:
for word in wordfile:
word = word.rstrip().upper()
slot = len(word)
assert slot <= 8, "Word {} was too long".format(word)
slots[slot - 1].append(word)
# Add ancillary characters to slot 1
slots[0].extend(Generator.singlechars)
return slots
@staticmethod
def read_wordlist(filename, max_len):
""" Read a file and return a list of words. Skips words longer than max_len """
wordlist = []
with open(filename, "r") as wordfile:
for word in wordfile:
word = word.strip().upper()
if len(word) > max_len:
warnings.warn("word {} exceeds max length {}".format(word, max_len))
continue
wordlist.append(word)
return wordlist
def choose_word(self, slot):
return random.choice(self.word_slots[slot])
def generate(self):
""" Generate an 8.3 filename """
filename = ""
# Generate an 11 char word, we will split it up later into an 8.3
# filename
while len(filename) < 11:
word = random.choice(self.wordlist).upper()
total_len = len(word) + len(filename)
if total_len > 13:
# Even with continuations we can't exceed this, but let's
# give a random chance to shorten, or try again.
if random.randint(0, 14) == 0:
filename += word
filename = filename[:6]
filename += "~1"
elif random.randint(0, 5) == 0:
filename = ""
continue
elif filename and random.randint(0, 4) == 0:
# 1 in 5 chance of opportunistic combination
for i in range(2, 0, -1):
if filename[-i:] == word[:i] and total_len - i <= 11:
word = word[i:]
total_len -= i
break
elif total_len > 11:
# Continuation not happening, don't exceed 11
continue
if (total_len <= 8 or
(total_len == 11 and random.randint(0, 19) == 0)):
# Tack on the word
filename += word
# Special cases
if total_len == 6 and random.randint(0, 14) != 0:
# 1 in 15 chance of a ~1 substitution
filename += "~1"
elif total_len == 7 and random.randint(0, 19) != 0:
# 1 in 20 chance of continuing, otherwise consider adding ~1 or
# a character, then the extension
if random.randint(0, 14) == 0:
# 1 in 15 chance of adding ~1
filename = filename[:6] + "~1"
else:
filename += random.choice(self.validlast)
filename += random.choice(self.extlist).upper()
elif total_len == 8:
# 1 in 15 chance we change it to ~1
if random.randint(0, 14) == 0:
filename = filename[:6] + "~1"
# Add extension and call it a day
filename += random.choice(self.extlist).upper()
elif total_len == 10 and random.randint(0, 19) == 0:
# 1 in 20 chance of continuing into the extension, add padding
filename += word
filename += random.choice(self.validlast)
return filename[:8] + "." + filename[-3:]
length = random.randint(6, 11)
minslot = 1
while len(filename) < length:
maxslot = length - len(filename) - 1
if maxslot > 7:
# We only have slots 0-7
maxslot = 7
elif length - len(filename) == 1:
# Set minslot to 0 to allow single chars in this case
minslot = 0
slot = random.randint(minslot, maxslot)
filename += self.choose_word(slot)
if len(filename) == 6:
return "{}~1.{}".format(filename, random.choice(self.extlist))
elif len(filename) == 7:
return "{}~1.{}".format(filename[:6], random.choice(self.extlist))
elif len(filename) == 8:
return "{}.{}".format(filename, random.choice(self.extlist))
elif len(filename) in (9, 10):
# Corner case: this results in extensions with only numbers usually...
return "{}.{}".format(filename[:8], random.choice(self.extlist))
else:
return "{}.{}".format(filename[:8], filename[8:])
def generate_clean(self):
""" Generate an 8.3 filename that does not contain bad words """
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment