+ allowed multiple times --keywords-file

~ keywords.json replaced with others *.keywords.json
2025-09-06 17:01:50 +02:00 · 2020-05-12 23:33:49 +02:00 · 2020-05-12 23:33:49 +02:00 · 9466e8378c
commit 9466e8378c
parent a68935e402
4 changed files with 60 additions and 38 deletions
--- a/README.md
+++ b/README.md
@ -69,9 +69,13 @@ Finally, see the following chapter about the **config.yml** file.
 ### Config.yml configuration file

 Options can also be taken from a configuration file : by default it reads the `config.yml` file in the current directory but can be changed with the `--config-file` and `--config-dir` options.
-This file is in YAML format with all options at the root level. Keys have the same name as command line options, with middle dashes `-` replaced with underscores `_`.
-
-E.g. `--ibmcloud-url https://api...` will become `ibmcloud_url: https://api...`.
+This file is in YAML format with all options at the root level. Keys have the same name as command line options, with middle dashes `-` replaced with underscores `_` and a 's' appended for lists (options that can appear several times).
+E.g. `--ibmcloud-url https://api...` will become `ibmcloud_url: https://api...` and `--keywords-file 1.json --keywords-file 2.json` will become :
+```yaml
+keywords_files:
+    - 1.json
+    - 2.json
+```

 A sample configuration is available in the `test/sample-conf/` directory.

--- a/nicobot/transbot.py
+++ b/nicobot/transbot.py
@ -47,7 +47,7 @@ class Config:
            'ibmcloud_apikey': None,
            'input_file': sys.stdin,
            'keywords': [],
-            'keywords_file': None,
+            'keywords_files': [],
            'languages': [],
            'languages_file': None,
            'locale': None,
@ -76,10 +76,10 @@ class TransBot(Bot):
    """


-    def __init__( self, chatter, ibmcloud_url, ibmcloud_apikey, keywords=None, keywords_file=None, languages=None, languages_file=None, shutdown_pattern=r'bye nicobot' ):
+    def __init__( self, chatter, ibmcloud_url, ibmcloud_apikey, keywords=[], keywords_files=[], languages=None, languages_file=None, shutdown_pattern=r'bye nicobot' ):
        """
            keywords: list of keywords that will trigger this bot (in any supported language)
-            keywords_file: JSON file where to find the list of keywords (or write into)
+            keywords_files: list of JSON files with each a list of keywords (or write into)
            languages: List of supported languages in this format : https://cloud.ibm.com/apidocs/language-translator#list-identifiable-languages
            languages_file: JSON file where to find the list of target languages (or write into)
            shutdown_pattern: a regular expression pattern that terminates this bot
@ -101,8 +101,8 @@ class TransBot(Bot):
        # How many different languages to try to translate to
        self.tries = 5

-        # After self.languages has been set, we can iterate over to translate keywords
-        kws = self.loadKeywords( keywords=keywords, file=keywords_file, limit=LIMIT_KEYWORDS )
+        # After self.languages has been set, we can iterate over it to translate keywords
+        kws = self.loadKeywords( keywords=keywords, files=keywords_files, limit=LIMIT_KEYWORDS )
        # And build a regular expression pattern with all keywords and their translations
        pattern = kws[0]
        for keyword in kws[1:]:
@ -162,32 +162,31 @@ class TransBot(Bot):
            r.raise_for_status()


-    def loadKeywords( self, keywords=[], file=None, limit=None ):
+    def loadKeywords( self, keywords=[], files=[], limit=None ):
        """
            Generates a list of translations from a list of keywords.

            Requires self.languages to be filled before !

-            If 'keywords' is not empty, will download the translations from IBM Cloud into 'file'.
-            Otherwise, will try to read from 'file', falling back to IBM Cloud and saving it into 'file' if it fails.
+            If 'keywords' is not empty, will download the translations from IBM Cloud
+            and if a single 'file' was given, will save them into it.
+            Otherwise, will read from all the given 'files'
        """

        # TODO It starts with the same code as in loadLanguages : make it a function

-        # Gets the list from a local file
-        if not keywords or len(keywords) == 0:
-            logging.debug("Reading from %s..." % file)
-            try:
-                with open(file,'r') as f:
-                    j = json.load(f)
-                    logging.debug("Read keyword list : %s",repr(j))
-                    return j
-            except:
-                raise ValueError("Could not read keywords list from %s and no keyword given" % file)
-                pass
-
        kws = []

+        # Gets the list from a local file
+        if len(keywords) == 0:
+            for file in files:
+                logging.debug("Reading from %s..." % file)
+                # May throw an error
+                with open(file,'r') as f:
+                    kws = kws + json.load(f)
+            logging.debug("Read keyword list : %s",repr(kws))
+            return kws
+
        # TODO remove duplicates
        for keyword in keywords:
            logging.debug("Init %s...",keyword)
@ -209,16 +208,17 @@ class TransBot(Bot):
                    pass
        logging.debug("Keywords : %s", repr(kws))

-        if file:
+        # TODO ? Save the translations for each keyword into a separate file ?
+        if files and len(files) == 1:
            try:
-                logging.debug("Saving keywords translations into %s...", file)
-                with open(file,'w') as f:
+                logging.debug("Saving keywords translations into %s...", files[0])
+                with open(files[0],'w') as f:
                    json.dump(kws,f)
            except:
-                logging.exception("Could not save keywords translations into %s", file)
+                logging.exception("Could not save keywords translations into %s", files[0])
                pass
        else:
-            logging.debug("Not saving keywords as no file was given")
+            logging.debug("Not saving keywords as a (single) file was not given")

        return kws

@ -345,7 +345,7 @@ if __name__ == '__main__':
    parser.add_argument('--verbosity', '-V', dest='verbosity', default=config.verbosity, help="Log level")
    # Core arguments
    parser.add_argument("--keyword", "-k", dest="keywords", action="append", help="Keyword bot should react to (will write them into the file specified with --keywords-file)")
-    parser.add_argument("--keywords-file", dest="keywords_file", help="File to load from and write keywords to")
+    parser.add_argument("--keywords-files", dest="keywords_files", action="append", help="File to load from and write keywords to")
    parser.add_argument("--language", "-l", dest="languages", action="append", help="Target language")
    parser.add_argument("--languages-file", dest="languages_file", help="File to load from and write languages to")
    parser.add_argument("--shutdown", dest="shutdown", help="Shutdown keyword regular expression pattern")
@ -425,16 +425,30 @@ if __name__ == '__main__':

    # config.keywords is used if given
    # else, check for an existing keywords_file
-    if not config.keywords_file:
+    if len(config.keywords_files) == 0:
        # As a last resort, use 'keywords.json' in the config directory
-        config.keywords_file = os.path.join(config.config_dir,'keywords.json')
+        config.keywords_files = [ os.path.join(config.config_dir,'keywords.json') ]
    # Convenience check to better warn the user
    if not config.keywords:
-        try:
-            with open(config.keywords_file,'r') as f:
-                pass
-        except:
-            raise ValueError("Could not open %s : please generate with --keywords first or create the file indicated with --keywords-file"%config.keywords_file)
+        found_keywords_file = []
+        for keywords_file in config.keywords_files:
+            try:
+                with open(keywords_file,'r') as f:
+                    found_keywords_file = found_keywords_file + [keywords_file]
+                    continue
+            except:
+                # Also allows filenames relative to config_dir
+                try:
+                    relative_filename = os.path.join(config.config_dir,keywords_file)
+                    with open(relative_filename,'r') as f:
+                        found_keywords_file = found_keywords_file + [relative_filename]
+                        continue
+                except:
+                    pass
+        if len(found_keywords_file) > 0:
+            config.keywords_files = found_keywords_file
+        else:
+            raise ValueError("Could not open any keywords file in %s : please generate with --keywords first or create the file indicated with --keywords-file"%repr(config.keywords_files))

    # config.languages is used if given
    # else, check for an existing languages_file
@ -475,7 +489,7 @@ if __name__ == '__main__':
    #

    TransBot(
-        keywords=config.keywords, keywords_file=config.keywords_file,
+        keywords=config.keywords, keywords_files=config.keywords_files,
        languages=config.languages, languages_file=config.languages_file,
        ibmcloud_url=config.ibmcloud_url, ibmcloud_apikey=config.ibmcloud_apikey,
        shutdown_pattern=config.shutdown,
--- a/test/sample-conf/config.yml
+++ b/test/sample-conf/config.yml
@ -1,3 +1,8 @@
+# A list of keywords files to load (if different from the default 'keywords.json')
+keywords_files:
+    - hello.keywords.json
+    - goodbye.keywords.json
+
 # IBM Cloud credentials for your 'Language Translator' service instance : get them from your
 # See detailed instructions : https://cloud.ibm.com/apidocs/language-translator
 ibmcloud_url: https://api.us-south.language-translator.watson.cloud.ibm.com/instances/6bbda3b3-d572-45e1-8c54-22d6ed9e52c2
--- a/test/sample-conf/keywords.json
+++ b/test/sample-conf/keywords.json
@ -1 +0,0 @@
-["bonjour", "\u0645\u0631\u062d\u0628\u0627", "\u0410\u043b\u043e?", "\u09b9\u09cd\u09af\u09be\u09b2\u09cb", "Ahoj.", "Hallo?", "Guten Tag", "\u0395\u03bc\u03c0\u03c1\u03cc\u03c2!", "Hello", "hola", "Tere.", "Hei", "Hello", "\u0ab9\u0ac7\u0ab2\u0acb\u0ab5", "\u05d4\u05dc\u05d5", "\u0928\u092e\u0938\u094d\u0915\u093e\u0930", "Zdravo.", "Hello", "Ciao", "\u30cf\u30ed\u30fc", "\uc548\ub155\ud558\uc138\uc694", "Labas.", "Sveika.", "\u0d39\u0d32\u0d4b", "Hello.", "Hello", "Hallo", "\u0939\u0947\u0932\u094b", "Hallo", "Witaj", "Ol\u00e1", "Salut", "\u0417\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435.", "\u0dc4\u0dd9\u0dbd\u0ddd", "Ahoj.", "Zdravo.", "Hej.", "\u0bb9\u0bb2\u0bcb", "\u0c39\u0c32\u0c4b", "\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35", "Merhaba.", "\u0633\u0644\u0627\u0645", "Xin ch\u00e0o", "\u4f60\u597d", "\u4f60\u597d", "coucou", "\u0645\u0633\u062d\u0648\u0631", "\"", "\u0995\u09c1\u0981\u099a\u09be\u09a8\u09cb", "scelov\u00e1no", "couched", "Couched", "- ...", "couched", "conectado", "kubitud", "Coutettava", "Couch\u00e9", "@ info", "\u0a9c\u0acb\u0aa1\u0ac7\u0ab2", "\u05de\u05e6\u05e8\u05e3", "\u0926\u093f\u092f\u093e \u0939\u0941\u0906", "spojeno", "Kusz\u00e1lt", "accoppiata", "\u30af\u30c1\u30c9", "\ucfe8\ud558\uac8c", "kuita", "1.", "\u0d15\u0d1a\u0d4d\u0d1a\u0d35\u0d1f\u0d02", "Kuy", "kuffar", "hovet", "\u091c\u094b\u0930\u0926\u093e\u0930", "Couches", "przewr\u00f3cony", "cucut\u0103", "\u041a\u0430\u0448\u0435\u043b\u044c", "\u0d9a\u0daf\u0dc0\u0dd4\u0dbb", "pre\u0165ahovan\u00e9", "couched", "couched", "\u0b95\u0bc2\u0bae\u0bcd\u0baa\u0bc1", "\u0c15\u0c42\u0c30\u0c4d\u0c2a\u0c41", "\u0e16\u0e39\u0e01\u0e1e\u0e31\u0e01\u0e44\u0e27\u0e49", "kanepeden", "\u06a9\u0648\u062a", "C\u00f3", "\u5e93\u7126", "\u9999", "salut", "hello"]
				`@ -1 +0,0 @@`
				["bonjour", "\u0645\u0631\u062d\u0628\u0627", "\u0410\u043b\u043e?", "\u09b9\u09cd\u09af\u09be\u09b2\u09cb", "Ahoj.", "Hallo?", "Guten Tag", "\u0395\u03bc\u03c0\u03c1\u03cc\u03c2!", "Hello", "hola", "Tere.", "Hei", "Hello", "\u0ab9\u0ac7\u0ab2\u0acb\u0ab5", "\u05d4\u05dc\u05d5", "\u0928\u092e\u0938\u094d\u0915\u093e\u0930", "Zdravo.", "Hello", "Ciao", "\u30cf\u30ed\u30fc", "\uc548\ub155\ud558\uc138\uc694", "Labas.", "Sveika.", "\u0d39\u0d32\u0d4b", "Hello.", "Hello", "Hallo", "\u0939\u0947\u0932\u094b", "Hallo", "Witaj", "Ol\u00e1", "Salut", "\u0417\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435.", "\u0dc4\u0dd9\u0dbd\u0ddd", "Ahoj.", "Zdravo.", "Hej.", "\u0bb9\u0bb2\u0bcb", "\u0c39\u0c32\u0c4b", "\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35", "Merhaba.", "\u0633\u0644\u0627\u0645", "Xin ch\u00e0o", "\u4f60\u597d", "\u4f60\u597d", "coucou", "\u0645\u0633\u062d\u0648\u0631", "\"", "\u0995\u09c1\u0981\u099a\u09be\u09a8\u09cb", "scelov\u00e1no", "couched", "Couched", "- ...", "couched", "conectado", "kubitud", "Coutettava", "Couch\u00e9", "@ info", "\u0a9c\u0acb\u0aa1\u0ac7\u0ab2", "\u05de\u05e6\u05e8\u05e3", "\u0926\u093f\u092f\u093e \u0939\u0941\u0906", "spojeno", "Kusz\u00e1lt", "accoppiata", "\u30af\u30c1\u30c9", "\ucfe8\ud558\uac8c", "kuita", "1.", "\u0d15\u0d1a\u0d4d\u0d1a\u0d35\u0d1f\u0d02", "Kuy", "kuffar", "hovet", "\u091c\u094b\u0930\u0926\u093e\u0930", "Couches", "przewr\u00f3cony", "cucut\u0103", "\u041a\u0430\u0448\u0435\u043b\u044c", "\u0d9a\u0daf\u0dc0\u0dd4\u0dbb", "pre\u0165ahovan\u00e9", "couched", "couched", "\u0b95\u0bc2\u0bae\u0bcd\u0baa\u0bc1", "\u0c15\u0c42\u0c30\u0c4d\u0c2a\u0c41", "\u0e16\u0e39\u0e01\u0e1e\u0e31\u0e01\u0e44\u0e27\u0e49", "kanepeden", "\u06a9\u0648\u062a", "C\u00f3", "\u5e93\u7126", "\u9999", "salut", "hello"]