+ allowed multiple times --keywords-file

~ keywords.json replaced with others *.keywords.json
This commit is contained in:
nicobo 2020-05-12 23:33:49 +02:00
parent a68935e402
commit 9466e8378c
No known key found for this signature in database
GPG key ID: 2581E71C5FA5285F
4 changed files with 60 additions and 38 deletions

View file

@ -69,9 +69,13 @@ Finally, see the following chapter about the **config.yml** file.
### Config.yml configuration file
Options can also be taken from a configuration file : by default it reads the `config.yml` file in the current directory but can be changed with the `--config-file` and `--config-dir` options.
This file is in YAML format with all options at the root level. Keys have the same name as command line options, with middle dashes `-` replaced with underscores `_`.
E.g. `--ibmcloud-url https://api...` will become `ibmcloud_url: https://api...`.
This file is in YAML format with all options at the root level. Keys have the same name as command line options, with middle dashes `-` replaced with underscores `_` and a 's' appended for lists (options that can appear several times).
E.g. `--ibmcloud-url https://api...` will become `ibmcloud_url: https://api...` and `--keywords-file 1.json --keywords-file 2.json` will become :
```yaml
keywords_files:
- 1.json
- 2.json
```
A sample configuration is available in the `test/sample-conf/` directory.

View file

@ -47,7 +47,7 @@ class Config:
'ibmcloud_apikey': None,
'input_file': sys.stdin,
'keywords': [],
'keywords_file': None,
'keywords_files': [],
'languages': [],
'languages_file': None,
'locale': None,
@ -76,10 +76,10 @@ class TransBot(Bot):
"""
def __init__( self, chatter, ibmcloud_url, ibmcloud_apikey, keywords=None, keywords_file=None, languages=None, languages_file=None, shutdown_pattern=r'bye nicobot' ):
def __init__( self, chatter, ibmcloud_url, ibmcloud_apikey, keywords=[], keywords_files=[], languages=None, languages_file=None, shutdown_pattern=r'bye nicobot' ):
"""
keywords: list of keywords that will trigger this bot (in any supported language)
keywords_file: JSON file where to find the list of keywords (or write into)
keywords_files: list of JSON files with each a list of keywords (or write into)
languages: List of supported languages in this format : https://cloud.ibm.com/apidocs/language-translator#list-identifiable-languages
languages_file: JSON file where to find the list of target languages (or write into)
shutdown_pattern: a regular expression pattern that terminates this bot
@ -101,8 +101,8 @@ class TransBot(Bot):
# How many different languages to try to translate to
self.tries = 5
# After self.languages has been set, we can iterate over to translate keywords
kws = self.loadKeywords( keywords=keywords, file=keywords_file, limit=LIMIT_KEYWORDS )
# After self.languages has been set, we can iterate over it to translate keywords
kws = self.loadKeywords( keywords=keywords, files=keywords_files, limit=LIMIT_KEYWORDS )
# And build a regular expression pattern with all keywords and their translations
pattern = kws[0]
for keyword in kws[1:]:
@ -162,32 +162,31 @@ class TransBot(Bot):
r.raise_for_status()
def loadKeywords( self, keywords=[], file=None, limit=None ):
def loadKeywords( self, keywords=[], files=[], limit=None ):
"""
Generates a list of translations from a list of keywords.
Requires self.languages to be filled before !
If 'keywords' is not empty, will download the translations from IBM Cloud into 'file'.
Otherwise, will try to read from 'file', falling back to IBM Cloud and saving it into 'file' if it fails.
If 'keywords' is not empty, will download the translations from IBM Cloud
and if a single 'file' was given, will save them into it.
Otherwise, will read from all the given 'files'
"""
# TODO It starts with the same code as in loadLanguages : make it a function
# Gets the list from a local file
if not keywords or len(keywords) == 0:
logging.debug("Reading from %s..." % file)
try:
with open(file,'r') as f:
j = json.load(f)
logging.debug("Read keyword list : %s",repr(j))
return j
except:
raise ValueError("Could not read keywords list from %s and no keyword given" % file)
pass
kws = []
# Gets the list from a local file
if len(keywords) == 0:
for file in files:
logging.debug("Reading from %s..." % file)
# May throw an error
with open(file,'r') as f:
kws = kws + json.load(f)
logging.debug("Read keyword list : %s",repr(kws))
return kws
# TODO remove duplicates
for keyword in keywords:
logging.debug("Init %s...",keyword)
@ -209,16 +208,17 @@ class TransBot(Bot):
pass
logging.debug("Keywords : %s", repr(kws))
if file:
# TODO ? Save the translations for each keyword into a separate file ?
if files and len(files) == 1:
try:
logging.debug("Saving keywords translations into %s...", file)
with open(file,'w') as f:
logging.debug("Saving keywords translations into %s...", files[0])
with open(files[0],'w') as f:
json.dump(kws,f)
except:
logging.exception("Could not save keywords translations into %s", file)
logging.exception("Could not save keywords translations into %s", files[0])
pass
else:
logging.debug("Not saving keywords as no file was given")
logging.debug("Not saving keywords as a (single) file was not given")
return kws
@ -345,7 +345,7 @@ if __name__ == '__main__':
parser.add_argument('--verbosity', '-V', dest='verbosity', default=config.verbosity, help="Log level")
# Core arguments
parser.add_argument("--keyword", "-k", dest="keywords", action="append", help="Keyword bot should react to (will write them into the file specified with --keywords-file)")
parser.add_argument("--keywords-file", dest="keywords_file", help="File to load from and write keywords to")
parser.add_argument("--keywords-files", dest="keywords_files", action="append", help="File to load from and write keywords to")
parser.add_argument("--language", "-l", dest="languages", action="append", help="Target language")
parser.add_argument("--languages-file", dest="languages_file", help="File to load from and write languages to")
parser.add_argument("--shutdown", dest="shutdown", help="Shutdown keyword regular expression pattern")
@ -425,16 +425,30 @@ if __name__ == '__main__':
# config.keywords is used if given
# else, check for an existing keywords_file
if not config.keywords_file:
if len(config.keywords_files) == 0:
# As a last resort, use 'keywords.json' in the config directory
config.keywords_file = os.path.join(config.config_dir,'keywords.json')
config.keywords_files = [ os.path.join(config.config_dir,'keywords.json') ]
# Convenience check to better warn the user
if not config.keywords:
try:
with open(config.keywords_file,'r') as f:
pass
except:
raise ValueError("Could not open %s : please generate with --keywords first or create the file indicated with --keywords-file"%config.keywords_file)
found_keywords_file = []
for keywords_file in config.keywords_files:
try:
with open(keywords_file,'r') as f:
found_keywords_file = found_keywords_file + [keywords_file]
continue
except:
# Also allows filenames relative to config_dir
try:
relative_filename = os.path.join(config.config_dir,keywords_file)
with open(relative_filename,'r') as f:
found_keywords_file = found_keywords_file + [relative_filename]
continue
except:
pass
if len(found_keywords_file) > 0:
config.keywords_files = found_keywords_file
else:
raise ValueError("Could not open any keywords file in %s : please generate with --keywords first or create the file indicated with --keywords-file"%repr(config.keywords_files))
# config.languages is used if given
# else, check for an existing languages_file
@ -475,7 +489,7 @@ if __name__ == '__main__':
#
TransBot(
keywords=config.keywords, keywords_file=config.keywords_file,
keywords=config.keywords, keywords_files=config.keywords_files,
languages=config.languages, languages_file=config.languages_file,
ibmcloud_url=config.ibmcloud_url, ibmcloud_apikey=config.ibmcloud_apikey,
shutdown_pattern=config.shutdown,

View file

@ -1,3 +1,8 @@
# A list of keywords files to load (if different from the default 'keywords.json')
keywords_files:
- hello.keywords.json
- goodbye.keywords.json
# IBM Cloud credentials for your 'Language Translator' service instance : get them from your
# See detailed instructions : https://cloud.ibm.com/apidocs/language-translator
ibmcloud_url: https://api.us-south.language-translator.watson.cloud.ibm.com/instances/6bbda3b3-d572-45e1-8c54-22d6ed9e52c2

View file

@ -1 +0,0 @@
["bonjour", "\u0645\u0631\u062d\u0628\u0627", "\u0410\u043b\u043e?", "\u09b9\u09cd\u09af\u09be\u09b2\u09cb", "Ahoj.", "Hallo?", "Guten Tag", "\u0395\u03bc\u03c0\u03c1\u03cc\u03c2!", "Hello", "hola", "Tere.", "Hei", "Hello", "\u0ab9\u0ac7\u0ab2\u0acb\u0ab5", "\u05d4\u05dc\u05d5", "\u0928\u092e\u0938\u094d\u0915\u093e\u0930", "Zdravo.", "Hello", "Ciao", "\u30cf\u30ed\u30fc", "\uc548\ub155\ud558\uc138\uc694", "Labas.", "Sveika.", "\u0d39\u0d32\u0d4b", "Hello.", "Hello", "Hallo", "\u0939\u0947\u0932\u094b", "Hallo", "Witaj", "Ol\u00e1", "Salut", "\u0417\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435.", "\u0dc4\u0dd9\u0dbd\u0ddd", "Ahoj.", "Zdravo.", "Hej.", "\u0bb9\u0bb2\u0bcb", "\u0c39\u0c32\u0c4b", "\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35", "Merhaba.", "\u0633\u0644\u0627\u0645", "Xin ch\u00e0o", "\u4f60\u597d", "\u4f60\u597d", "coucou", "\u0645\u0633\u062d\u0648\u0631", "\"", "\u0995\u09c1\u0981\u099a\u09be\u09a8\u09cb", "scelov\u00e1no", "couched", "Couched", "- ...", "couched", "conectado", "kubitud", "Coutettava", "Couch\u00e9", "@ info", "\u0a9c\u0acb\u0aa1\u0ac7\u0ab2", "\u05de\u05e6\u05e8\u05e3", "\u0926\u093f\u092f\u093e \u0939\u0941\u0906", "spojeno", "Kusz\u00e1lt", "accoppiata", "\u30af\u30c1\u30c9", "\ucfe8\ud558\uac8c", "kuita", "1.", "\u0d15\u0d1a\u0d4d\u0d1a\u0d35\u0d1f\u0d02", "Kuy", "kuffar", "hovet", "\u091c\u094b\u0930\u0926\u093e\u0930", "Couches", "przewr\u00f3cony", "cucut\u0103", "\u041a\u0430\u0448\u0435\u043b\u044c", "\u0d9a\u0daf\u0dc0\u0dd4\u0dbb", "pre\u0165ahovan\u00e9", "couched", "couched", "\u0b95\u0bc2\u0bae\u0bcd\u0baa\u0bc1", "\u0c15\u0c42\u0c30\u0c4d\u0c2a\u0c41", "\u0e16\u0e39\u0e01\u0e1e\u0e31\u0e01\u0e44\u0e27\u0e49", "kanepeden", "\u06a9\u0648\u062a", "C\u00f3", "\u5e93\u7126", "\u9999", "salut", "hello"]