From 5a990fd550b37b631281509c3e554ed8f86534b7 Mon Sep 17 00:00:00 2001
From: HappyZ <zhuyanzi@gmail.com>
Date: Mon, 29 May 2023 23:33:21 -0700
Subject: [PATCH] [BE] estimate count of token using rule of thumb: one token
 generally corresponds to ~4 characters of text.

---
 backend.py              | 8 +++++++-
 templates/index.html    | 6 ++----
 utilities/img2img.py    | 8 ++++----
 utilities/inpainting.py | 8 ++++----
 utilities/text2img.py   | 8 ++++----
 5 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/backend.py b/backend.py
index 89db89b..95b94a6 100644
--- a/backend.py
+++ b/backend.py
@@ -138,7 +138,13 @@ def backend(model, gfpgan_folderpath, is_debugging: bool):
                 )
             elif next_job[KEY_JOB_TYPE] == VALUE_JOB_RESTORATION:
                 ref_img_filepath = next_job[REFERENCE_IMG]
-                result_dict = gfpgan(gfpgan_folderpath, next_job[UUID], ref_img_filepath, config=config, logger=logger)
+                result_dict = gfpgan(
+                    gfpgan_folderpath,
+                    next_job[UUID],
+                    ref_img_filepath,
+                    config=config,
+                    logger=logger,
+                )
                 if not result_dict:
                     raise ValueError("failed to run gfpgan")
             else:
diff --git a/templates/index.html b/templates/index.html
index 567747b..f478459 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -281,7 +281,7 @@
                                         <div class="form-row">
                                             <label for="inpaint-strike-size" class="form-label" data-en_XX="Strike Size"
                                                 data-zh_CN="笔触大小">Strike Size</label>
-                                            <input type="range" class="form-range" min="1" max="30"
+                                            <input type="range" class="form-range" min="1" max="50"
                                                 id="inpaint-strike-size">
                                             <output id='range-value'></output>
                                         </div>
@@ -689,9 +689,7 @@
 
                     },
                     error: function (xhr, status, error) {
-                        // Handle error response
-                        console.log(xhr.responseText);
-                        $('#joblist').html("found nothing");
+                        $('#joblist').html("try again later");
                     }
                 });
             });
diff --git a/utilities/img2img.py b/utilities/img2img.py
index 710122b..99989dc 100644
--- a/utilities/img2img.py
+++ b/utilities/img2img.py
@@ -44,17 +44,17 @@ class Img2Img:
         self.__logger.info(f"model has max length of {self.__max_length}")
 
     def __token_limit_workaround(self, prompt: str, negative_prompt: str = ""):
-        count_prompt = len(re.split("[ ,]+", prompt))
-        count_negative_prompt = len(re.split("[ ,]+", negative_prompt))
+        token_est_count_prompt = len(prompt) / 4
+        token_est_count_neg_prompt = len(negative_prompt) / 4
 
-        if count_prompt < 77 and count_negative_prompt < 77:
+        if token_est_count_prompt < 77 and token_est_count_neg_prompt < 77:
             return prompt, None, negative_prompt, None
 
         self.__logger.info(
             "using workaround to generate embeds instead of direct string"
         )
 
-        if count_prompt >= count_negative_prompt:
+        if token_est_count_prompt >= token_est_count_neg_prompt:
             input_ids = self.model.img2img_pipeline.tokenizer(
                 prompt, return_tensors="pt", truncation=False
             ).input_ids.to(self.__device)
diff --git a/utilities/inpainting.py b/utilities/inpainting.py
index 54b6e82..b3d16bc 100644
--- a/utilities/inpainting.py
+++ b/utilities/inpainting.py
@@ -43,17 +43,17 @@ class Inpainting:
         self.__logger.info(f"model has max length of {self.__max_length}")
 
     def __token_limit_workaround(self, prompt: str, negative_prompt: str = ""):
-        count_prompt = len(re.split("[ ,]+", prompt))
-        count_negative_prompt = len(re.split("[ ,]+", negative_prompt))
+        token_est_count_prompt = len(prompt) / 4
+        token_est_count_neg_prompt = len(negative_prompt) / 4
 
-        if count_prompt < 77 and count_negative_prompt < 77:
+        if token_est_count_prompt < 77 and token_est_count_neg_prompt < 77:
             return prompt, None, negative_prompt, None
 
         self.__logger.info(
             "using workaround to generate embeds instead of direct string"
         )
 
-        if count_prompt >= count_negative_prompt:
+        if token_est_count_prompt >= token_est_count_neg_prompt:
             input_ids = self.model.inpaint_pipeline.tokenizer(
                 prompt, return_tensors="pt", truncation=False
             ).input_ids.to(self.__device)
diff --git a/utilities/text2img.py b/utilities/text2img.py
index 5008cc0..298d785 100644
--- a/utilities/text2img.py
+++ b/utilities/text2img.py
@@ -41,17 +41,17 @@ class Text2Img:
         self.__logger.info(f"model has max length of {self.__max_length}")
 
     def __token_limit_workaround(self, prompt: str, negative_prompt: str = ""):
-        count_prompt = len(re.split("[ ,]+", prompt))
-        count_negative_prompt = len(re.split("[ ,]+", negative_prompt))
+        token_est_count_prompt = len(prompt) / 4
+        token_est_count_neg_prompt = len(negative_prompt) / 4
 
-        if count_prompt < 77 and count_negative_prompt < 77:
+        if token_est_count_prompt < 77 and token_est_count_neg_prompt < 77:
             return prompt, None, negative_prompt, None
 
         self.__logger.info(
             "using workaround to generate embeds instead of direct string"
         )
 
-        if count_prompt >= count_negative_prompt:
+        if token_est_count_prompt >= token_est_count_neg_prompt:
             input_ids = self.model.txt2img_pipeline.tokenizer(
                 prompt, return_tensors="pt", truncation=False
             ).input_ids.to(self.__device)