Expose overlap ratio and length limit to config
This commit is contained in:
parent
54563726b2
commit
dd78364f2d
3 changed files with 33 additions and 12 deletions
|
@ -49,7 +49,7 @@ I recommend that you create your bot's account on a Mastodon instance. Creating
|
||||||
Configuring mstdn-ebooks is accomplished by editing `config.json`. If you want to use a different file for configuration, specify it with the `--cfg` argument. For example, if you want to use `/home/lynne/c.json` instead, you would run `python3 main.py --cfg /home/lynne/c.json` instead of just `python3 main.py`
|
Configuring mstdn-ebooks is accomplished by editing `config.json`. If you want to use a different file for configuration, specify it with the `--cfg` argument. For example, if you want to use `/home/lynne/c.json` instead, you would run `python3 main.py --cfg /home/lynne/c.json` instead of just `python3 main.py`
|
||||||
|
|
||||||
| Setting | Default | Meaning |
|
| Setting | Default | Meaning |
|
||||||
|--------------------|------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|--------------------------|-----------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| site | https://botsin.space | The instance your bot will log in to and post from. This must start with `https://` or `http://` (preferably the latter) |
|
| site | https://botsin.space | The instance your bot will log in to and post from. This must start with `https://` or `http://` (preferably the latter) |
|
||||||
| cw | null | The content warning (aka subject) mstdn-ebooks will apply to non-error posts. |
|
| cw | null | The content warning (aka subject) mstdn-ebooks will apply to non-error posts. |
|
||||||
| instance_blacklist | ["bofa.lol", "witches.town", "knzk.me"] | If your bot is following someone from a blacklisted instance, it will skip over them and not download their posts. This is useful for ensuring that mstdn-ebooks doesn't waste time trying to download posts from dead instances, without you having to unfollow the user(s) from them. |
|
| instance_blacklist | ["bofa.lol", "witches.town", "knzk.me"] | If your bot is following someone from a blacklisted instance, it will skip over them and not download their posts. This is useful for ensuring that mstdn-ebooks doesn't waste time trying to download posts from dead instances, without you having to unfollow the user(s) from them. |
|
||||||
|
@ -57,6 +57,11 @@ Configuring mstdn-ebooks is accomplished by editing `config.json`. If you want t
|
||||||
| mention_handling | 1 | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour). |
|
| mention_handling | 1 | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour). |
|
||||||
| max_thread_length | 15 | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times. |
|
| max_thread_length | 15 | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times. |
|
||||||
| strip_paired_punctuation | false | If true, mstdn-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it. |
|
| strip_paired_punctuation | false | If true, mstdn-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it. |
|
||||||
|
| limit_length | false | If true, the sentence length will be random between `length_lower_limit` and `length_upper_limit` |
|
||||||
|
| length_lower_limit | 5 | The lower bound in the random number range above. Only matters if `limit_length` is true. |
|
||||||
|
| length_upper_limit | 50 | The upper bound in the random number range above. Can be the same as `length_lower_limit` to disable randomness. Only matters if `limit_length` is true. |
|
||||||
|
| overlap_ratio_enabled | false | If true, checks the output's similarity to the original posts. |
|
||||||
|
| overlap_ratio | 0.7 | The ratio that determins if the output is too similar to original or not. With decreasing ratio, both the interestingness of the output and the likelihood of failing to create output increases. Only matters if `overlap_ratio_enabled` is true. |
|
||||||
|
|
||||||
## Donating
|
## Donating
|
||||||
Please don't feel obligated to donate at all.
|
Please don't feel obligated to donate at all.
|
||||||
|
|
15
functions.py
15
functions.py
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
import markovify
|
import markovify
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from random import randint
|
||||||
import re, multiprocessing, sqlite3, shutil, os, html
|
import re, multiprocessing, sqlite3, shutil, os, html
|
||||||
|
|
||||||
def make_sentence(output, cfg):
|
def make_sentence(output, cfg):
|
||||||
|
@ -25,7 +26,9 @@ def make_sentence(output, cfg):
|
||||||
output.send("Database is empty! Try running main.py.")
|
output.send("Database is empty! Try running main.py.")
|
||||||
return
|
return
|
||||||
|
|
||||||
model = nlt_fixed(
|
nlt = markovify.NewlineText if cfg['overlap_ratio_enabled'] else nlt_fixed
|
||||||
|
|
||||||
|
model = nlt(
|
||||||
"\n".join([toot[0] for toot in toots])
|
"\n".join([toot[0] for toot in toots])
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -34,10 +37,18 @@ def make_sentence(output, cfg):
|
||||||
|
|
||||||
toots_str = None
|
toots_str = None
|
||||||
|
|
||||||
|
if cfg['limit_length']:
|
||||||
|
sentence_len = randint(cfg['length_lower_limit'], cfg['length_upper_limit'])
|
||||||
|
|
||||||
sentence = None
|
sentence = None
|
||||||
tries = 0
|
tries = 0
|
||||||
while sentence is None and tries < 10:
|
while sentence is None and tries < 10:
|
||||||
sentence = model.make_short_sentence(500, tries=10000)
|
sentence = model.make_short_sentence(
|
||||||
|
max_chars=500,
|
||||||
|
tries=10000,
|
||||||
|
max_overlap_ratio=cfg['overlap_ratio'] if cfg['overlap_ratio_enabled'] else 0.7,
|
||||||
|
max_words=sentence_len if cfg['limit_length'] else None
|
||||||
|
)
|
||||||
tries = tries + 1
|
tries = tries + 1
|
||||||
|
|
||||||
# optionally remove mentions
|
# optionally remove mentions
|
||||||
|
|
7
main.py
7
main.py
|
@ -27,7 +27,12 @@ cfg = {
|
||||||
"learn_from_cw": False,
|
"learn_from_cw": False,
|
||||||
"mention_handling": 1,
|
"mention_handling": 1,
|
||||||
"max_thread_length": 15,
|
"max_thread_length": 15,
|
||||||
"strip_paired_punctuation": False
|
"strip_paired_punctuation": False,
|
||||||
|
"limit_length": False,
|
||||||
|
"length_lower_limit": 5,
|
||||||
|
"length_upper_limit": 50,
|
||||||
|
"overlap_ratio_enabled": False,
|
||||||
|
"overlap_ratio": 0.7
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in a new issue