Added code comments.

This commit is contained in:
2022-07-07 12:50:29 -04:00
parent 3aa3b64e1c
commit e4c4be429c
11 changed files with 334 additions and 31 deletions

View File

@@ -16,116 +16,215 @@ import java.security.SecureRandom;
import java.util.List;
import java.util.Random;
/**
* Ingests sentences and generates new ones using a Markov chain with a Redis storage.
*/
public final class MarkovChain {
private static final char DEFAULT_SENTENCE_END = '§';
private static final char DEFAULT_SENTENCE_END = '§'; // Default character to end sentences with if it does not include a period, exclamation mark, or question mark.
private final long id;
private final Crabstero crabstero;
private final Random rng;
private final long id; // A unique number for this Markov chain to segment it away from other Markov chains. This is usually the channel ID given by Discord.
private final Crabstero crabstero; // The Crabstero instance using this Markov chain.
private final Random rng; // A source of randomness.
/**
* Creates a new Markov chain identified by a given number and owned by a given instance of Crabstero.
*
* @param id The unique number.
* @param crabstero The instance of Crabstero.
*/
public MarkovChain(final long id, final Crabstero crabstero) {
this.id = id;
this.crabstero = crabstero;
this.rng = new SecureRandom();
this.rng = new SecureRandom(); // Probably placebo effect, but SecureRandom seems to produce better results.
}
/**
* Creates a new Markov chain identified by a given number, owned by a given instance of Crabstero, and using a custom source of randomness.
*
* @param id The unique number.
* @param crabstero The instance of Crabstero.
* @param random The custom randomness source.
*/
public MarkovChain(final long id, final Crabstero crabstero, final Random random) {
this.id = id;
this.crabstero = crabstero;
this.rng = random;
}
/**
* Checks whether a given sentence ends with a default sentence end character, a period, an exclamation mark, or a question mark.
*
* @param sentence The sentence to test.
* @return True if the sentence ends with a default sentence end character, a period, an exclamation mark, or a question mark, false otherwise.
*/
private static boolean isCompleteSentence(final String sentence) {
// Is the given sentence an empty string?
if (sentence.isEmpty()) {
// Yes. Therefor, it is not a complete sentence.
return false;
}
// Get the last character of the given sentence.
final char lastChar = sentence.charAt(sentence.length() - 1);
// Return true if th character is a default sentence end character, a period, an exclamation mark, or a question mark.
return (lastChar == DEFAULT_SENTENCE_END || lastChar == '.' || lastChar == '!' || lastChar == '?');
}
/**
* Ingests a string potentially containing multiple smaller sentences.
*
* @param paragraph The paragraph of sentences to ingest.
*/
public void ingest(String paragraph) {
// Is the given paragraph as a whole complete?
if (!isCompleteSentence(paragraph)) {
// No. Add DEFAULT_SENTENCE_END to the end.
paragraph += DEFAULT_SENTENCE_END;
}
// First, trim all leading and trailing spaces.
// Next, replace repeated spaces with a single space.
// Then, replace new lines with spaces.
// Finally, split the paragraph into a string array with each element being a single sentence.
final String[] sentences = paragraph.trim().replaceAll(" +", " ").replaceAll("\n", " ").split("(?<=[.!?]) ");
// Ingest each sentence individually.
for (String sentence : sentences) {
this.ingestSentence(sentence);
}
}
/**
* Ingests a string containing a single sentence.
*
* @param sentence The sentence to ingest.
*/
private void ingestSentence(String sentence) {
// Is the given sentence complete?
if (!isCompleteSentence(sentence)) {
// No. Add DEFAULT_SENTENCE_END to the end.
sentence += DEFAULT_SENTENCE_END;
}
// First, trim all leading and trailing spaces.
// Next, replace repeated spaces with a single space.
// Finally, split the sentence into a string array with each element being a word.
String[] words = sentence.trim().replaceAll(" +", " ").split(" ");
// Get a reference to Jedis from the pool.
try (final Jedis jedis = this.crabstero.getJedisPool().getResource()) {
// Use a pipeline to avoid blocking caused by waiting for responses from the Redis server.
final Pipeline pipeline = jedis.pipelined();
// Iterate over the array of words, starting from the first element until the second to last element.
for (int i = 0; i < words.length - 1; i++) {
// Is this the first word?
if (i == 0) {
// Yes. Push the word into a special list containing starting words.
pipeline.lpush(this.id + ":start", words[i]);
pipeline.lpush(this.id + "::" + words[i], words[i + 1]);
} else {
pipeline.lpush(this.id + "::" + words[i], words[i + 1]);
}
// Push the word after this one into a list identified by this word.
pipeline.lpush(this.id + "::" + words[i], words[i + 1]);
}
}
}
/**
* Generates a new sentence using word learned from previously ingested sentences.
*
* @param softCharacterLimit The amount of characters to try and limit sentence length around.
* @param hardCharacterLimit The amount of characters to cut off the sentence at if it gets too long.
* @return A new sentence.
*/
public String generate(final int softCharacterLimit, final int hardCharacterLimit) {
// Declare a new builder for building the new sentence.
final StringBuilder newSentence = new StringBuilder();
// Get a reference to Jedis from the pool.
try (final Jedis jedis = this.crabstero.getJedisPool().getResource()) {
// Does this Markov chain have any starting words?
if (!jedis.exists(this.id + ":start")) {
// No. Quickly ingest something to avoid throwing an error.
// TODO: Could this soft lock since a reference to the jedis pool is already held?
this.ingestSentence("Hello world!");
}
String word = "";
// Declare a variable for holding the word about to be added to the new sentence.
String word;
// Get a list of all starting words.
final List<String> startingWords = jedis.lrange(this.id + ":start", 0, -1);
// Pick a random index for a starting word.
int index = rng.nextInt(startingWords.size());
// Get the word.
word = startingWords.get(index);
// Add it to the output sentence.
newSentence.append(word);
// While the selected word is in complete...
// Although quite rare, this loop will skip if the starting word is already complete on its own (e.g. "Yes.")
while (!isCompleteSentence(word)) {
// Get the list of words which can be added after the previous word.
final List<String> wordChoices = jedis.lrange(this.id + "::" + word, 0, -1);
// Reset the index.
index = -1;
// Is the new sentence length above the soft character limit?
if (newSentence.length() >= softCharacterLimit) {
// Yes. Time to aggressively search for words which can complete this sentence.
// Iterate over the word choices.
for (int i = 0; i < wordChoices.size(); i++) {
// Get current candidate word.
final String candidate = wordChoices.get(i);
// Does this word conclude the sentence?
if (isCompleteSentence(candidate)) {
// Select the index for this word and stop evaluating choices.
index = i;
break;
}
}
// Was no word to conclude the new sentence found?
if (index == -1) {
// Yes. Pick a random word and hope the next word can conclude it.
index = rng.nextInt(wordChoices.size());
}
} else {
// No, there is still room. Pick a random index.
index = rng.nextInt(wordChoices.size());
}
// Get the selected word.
word = wordChoices.get(index);
// Append a space and this word to the new sentence.
newSentence.append(" ").append(word);
// Is the new sentence now longer than the hard character limit?
final int sentenceLength = newSentence.length();
if (sentenceLength >= hardCharacterLimit) {
// Yes. Chop off the characters beyond the hard limit.
newSentence.delete(hardCharacterLimit, sentenceLength);
// Break out of the loop to forcefully declare the new sentence complete.
break;
}
}
}
// Is the last character of the new sentence the default sentence end character?
if (newSentence.charAt(newSentence.length() - 1) == DEFAULT_SENTENCE_END) {
// Yes. Get a rid of it. The default sentence end character is meant to internally mark the end of sentences which did not include a punctuation mark, as is common on Discord.
return newSentence.deleteCharAt(newSentence.length() - 1).toString();
} else {
// No. Return the new sentence as is.
return newSentence.toString();
}
}

View File

@@ -24,15 +24,18 @@ import java.security.SecureRandom;
import java.util.List;
import java.util.Random;
/**
* Assists with generating Discord messages in response to other users and ingesting raw messages.
*/
public class MarkovChainMessages {
private final Crabstero crabstero;
private final AllowedMentions allowedMentions;
private final Random rng = new SecureRandom();
public MarkovChainMessages(final Crabstero crabstero) {
this.crabstero = crabstero;
// Set up an allowed mentions filter which blocks any mentions from generating notifications to users.
final AllowedMentionsBuilder builder = new AllowedMentionsBuilder();
builder.setMentionEveryoneAndHere(false);
builder.setMentionRoles(false);
@@ -40,16 +43,27 @@ public class MarkovChainMessages {
this.allowedMentions = builder.build();
}
/**
* Sends a new message in Discord in response to a given message.
*
* @param message The message prompting the response.
*/
public void replyToMessage(final Message message) {
final TextChannel channel = message.getChannel();
// Does Crabstero have permissions to write to the channel the message was sent in?
if (!channel.canYouWrite()) {
// No. Give up.
return;
}
final long channelID;
// Is this channel a thread?
if (channel.asServerThreadChannel().isPresent()) {
// Yes. Store the ID of the parent channel for this thread.
channelID = channel.asServerThreadChannel().get().getParent().getId();
} else {
// No. Store the ID of the channel the message was sent in.
channelID = channel.getId();
}
@@ -57,13 +71,20 @@ public class MarkovChainMessages {
final MessageBuilder response = new MessageBuilder();
final MarkovChain markovChain = new MarkovChain(channelID, this.crabstero);
// Tell Discord which message Crabstero is replying to
response.replyTo(message);
// Generate a new body.
response.setContent(markovChain.generate(750, 1000));
// Was a random number greater than 0.95 (5% of the time) and does Crabstero have permission to use embeds in this channel?
if (this.rng.nextDouble() >= 0.95 && channel.canYouEmbedLinks()) {
// Yes. Generate an embed with a random title and description.
final EmbedBuilder embed = new EmbedBuilder();
embed.setTitle(markovChain.generate(200, 300));
embed.setDescription(markovChain.generate(300, 500));
// If image URLs are known for this channel, chose a random one and attach to the embed.
try (final Jedis jedis = this.crabstero.getJedisPool().getResource()) {
final List<String> embedImageURLs = jedis.lrange(channelID + ":images", 0, -1);
@@ -72,36 +93,61 @@ public class MarkovChainMessages {
}
}
// Add a watermark to the footer of the embed.
embed.setFooter("Crabstero is a logal.dev project", "https://logal.dev/images/logo.png");
// Attach the embed to the response.
response.setEmbed(embed);
}
// Set the allowed mentions filter which blocks all mentions from generating notifications.
response.setAllowedMentions(allowedMentions);
// Send the response.
response.send(channel).exceptionally(ExceptionLogger.get());
}
/**
* Ingests a given message into its channel's Markov chain.
*
* @param message The message to ingest.
*/
public void ingestMessage(final Message message) {
final MessageAuthor author = message.getAuthor();
// Is the author of the message a bot, a webhook, or mentioning Crabstero?
if (author.isBotUser() || author.isWebhook() || message.getMentionedUsers().contains(message.getApi().getYourself())) {
// Yes. Ignore it.
return;
}
// Get the Markov chain for the message's text channel.
final long channelID = message.getChannel().getId();
final MarkovChain markovChain = new MarkovChain(channelID, this.crabstero);
// Ingest the message content into the Markov chain.
markovChain.ingest(message.getContent());
// If the message has embeds, ingest each one individually.
for (final Embed embed : message.getEmbeds()) {
ingestEmbed(channelID, embed);
}
}
/**
* Ingests a given embed into a given channel's Markov chain.
*
* @param channelID The ID of the channel to use for the Markov Chain.
* @param embed The embed to ingest.
*/
public void ingestEmbed(final long channelID, final Embed embed) {
// Get the Markov chain for the given channel ID.
final MarkovChain markovChain = new MarkovChain(channelID, this.crabstero);
// If the embed has a title or description, ingest each one separately.
embed.getTitle().ifPresent(markovChain::ingest);
embed.getDescription().ifPresent(markovChain::ingest);
// If the embed has an image, store the URL to the image.
embed.getImage().ifPresent((image) -> {
try (final Jedis jedis = this.crabstero.getJedisPool().getResource()) {
jedis.lpush(channelID + ":images", image.getUrl().toString());