Adjust fix wen generated token is missing SELECT #44

pramitchoudhary · pramitchoudhary · commit 7ebed005cfcd · 2024-01-23T19:16:06.000-08:00
diff --git a/sidekick/configs/prompt_template.py b/sidekick/configs/prompt_template.py
@@ -35,7 +35,7 @@
                 ### *History*:\n{_sample_queries}
                 ### *Question*: For table {_table_name}, {_question}
                 # SELECT 1
-                ### *Tasks for table {_table_name}*:\n{_tasks}
+                ### *Plan for table {_table_name}*:\n{_tasks}
                 ### *Policies for SQL generation*:
                 # Avoid overly complex SQL queries, favor concise human readable SQL queries which are easy to understand and debug
                 # Avoid patterns that might be vulnerable to SQL injection
@@ -118,7 +118,7 @@
 - Only use supplied table names: **{table_name}** for generation
 - Only use column names from the CREATE TABLE statement: **{column_info}** for generation. DO NOT USE any other column names outside of this.
 - Avoid overly complex SQL queries, favor concise human readable SQL queries which are easy to understand and debug
-- Avoid patterns that might be vulnerable to SQL injection, e.g. sanitize inputs
+- Avoid patterns that might be vulnerable to SQL injection, e.g. use proper sanitization and escaping for raw user input
 - Always cast the numerator as float when computing ratios
 - Always use COUNT(1) instead of COUNT(*)
 - If the question is asking for a rate, use COUNT to compute percentage
diff --git a/sidekick/prompter.py b/sidekick/prompter.py
@@ -463,7 +463,7 @@ def ask(
     """
 
     results = []
-    err = None  # TODO - Need to handle errors if occurred
+    res = err = alt_res = None  # TODO - Need to handle errors if occurred
     # Book-keeping
     base_path = local_base_path if local_base_path else default_base_path
     setup_dir(base_path)
@@ -575,7 +575,7 @@ def ask(
                     click.echo("Skipping edit...")
             if updated_tasks is not None:
                 sql_g._tasks = updated_tasks
-        alt_res = None
+
         # The interface could also be used to simply execute user provided SQL
         # Keyword: "Execute SQL: <SQL query>"
         if (
diff --git a/sidekick/query.py b/sidekick/query.py
@@ -604,7 +604,7 @@ def generate_sql(
                     # Reset temperature to 0.5
                     current_temperature = 0.5
                     if model_name == "h2ogpt-sql-sqlcoder2" or model_name == "h2ogpt-sql-sqlcoder-34b-alpha" or model_name == "h2ogpt-sql-nsql-llama-2-7B":
-                        m_name = MODEL_CHOICE_MAP_EVAL_MODE.get(model_name, "h2ogpt-sql-sqlcoder2")
+                        m_name = MODEL_CHOICE_MAP_EVAL_MODE.get(model_name, "h2ogpt-sql-sqlcoder-34b-alpha")
                         query_txt = [{"role": "user", "content": query},]
                         logger.debug(f"Generation with default temperature : {current_temperature}")
                         completion = self.h2ogpt_client.with_options(max_retries=3).chat.completions.create(
@@ -633,79 +633,104 @@ def generate_sql(
                     # throttle temperature for different result
                     logger.info("Regeneration requested on previous query ...")
                     logger.debug(f"Selected temperature for fast regeneration : {random_temperature}")
-                    output = model.generate(
-                        **inputs.to(device_type),
-                        max_new_tokens=512,
-                        temperature=random_temperature,
-                        output_scores=True,
-                        do_sample=True,
-                        return_dict_in_generate=True,
-                    )
-                    generated_tokens = output.sequences[:, input_length:][0]
+                    if model_name == "h2ogpt-sql-sqlcoder2" or model_name == "h2ogpt-sql-sqlcoder-34b-alpha" or model_name == "h2ogpt-sql-nsql-llama-2-7B":
+                        m_name = MODEL_CHOICE_MAP_EVAL_MODE.get(model_name, "h2ogpt-sql-sqlcoder-34b-alpha")
+                        query_txt = [{"role": "user", "content": query},]
+                        completion = self.h2ogpt_client.with_options(max_retries=3).chat.completions.create(
+                                    model=m_name,
+                                    messages=query_txt,
+                                    max_tokens=512,
+                                    temperature=random_temperature,
+                                    stop="```",
+                                    seed=random_seed)
+                        generated_tokens = completion.choices[0].message.content
+                    else:
+                        output = model.generate(
+                            **inputs.to(device_type),
+                            max_new_tokens=512,
+                            temperature=random_temperature,
+                            output_scores=True,
+                            do_sample=True,
+                            return_dict_in_generate=True,
+                        )
+                        generated_tokens = output.sequences[:, input_length:][0]
                     self.current_temps[model_name] = random_temperature
                     logger.debug(f"Temperature saved: {self.current_temps[model_name]}")
                 else:
                     logger.info("Regeneration with options requested on previous query ...")
-                    # Diverse beam search decoding to explore more options
-                    logger.debug(f"Selected temperature for diverse beam search: {random_temperature}")
-                    output_re = model.generate(
-                        **inputs.to(device_type),
-                        max_new_tokens=512,
-                        temperature=random_temperature,
-                        top_k=5,
-                        top_p=0.9,
-                        num_beams=5,
-                        num_beam_groups=5,
-                        num_return_sequences=5,
-                        output_scores=True,
-                        do_sample=False,
-                        diversity_penalty=2.0,
-                        return_dict_in_generate=True,
-                    )
+                    if model_name == "h2ogpt-sql-sqlcoder2" or model_name == "h2ogpt-sql-sqlcoder-34b-alpha" or model_name == "h2ogpt-sql-nsql-llama-2-7B":
+                        logger.info("Generating diverse options, not enabled for remote models")
+                        m_name = MODEL_CHOICE_MAP_EVAL_MODE.get(model_name, "h2ogpt-sql-sqlcoder-34b-alpha")
+                        query_txt = [{"role": "user", "content": query},]
+                        completion = self.h2ogpt_client.with_options(max_retries=3).chat.completions.create(
+                                    model=m_name,
+                                    messages=query_txt,
+                                    max_tokens=512,
+                                    temperature=random_temperature,
+                                    stop="```",
+                                    seed=random_seed)
+                        generated_tokens = completion.choices[0].message.content
+                    else:
+                        # Diverse beam search decoding to explore more options
+                        logger.debug(f"Selected temperature for diverse beam search: {random_temperature}")
+                        output_re = model.generate(
+                            **inputs.to(device_type),
+                            max_new_tokens=512,
+                            temperature=random_temperature,
+                            top_k=5,
+                            top_p=0.9,
+                            num_beams=5,
+                            num_beam_groups=5,
+                            num_return_sequences=5,
+                            output_scores=True,
+                            do_sample=True,
+                            diversity_penalty=2.0,
+                            return_dict_in_generate=True,
+                        )
 
-                    transition_scores = model.compute_transition_scores(
-                        output_re.sequences, output_re.scores, output_re.beam_indices, normalize_logits=False
-                    )
+                        transition_scores = model.compute_transition_scores(
+                            output_re.sequences, output_re.scores, output_re.beam_indices, normalize_logits=False
+                        )
 
-                    # Create a boolean tensor where elements are True if the corresponding element in transition_scores is less than 0
-                    mask = transition_scores < 0
-                    # Sum the True values along axis 1
-                    counts = torch.sum(mask, dim=1)
-                    output_length = inputs.input_ids.shape[1] + counts
-                    length_penalty = model.generation_config.length_penalty
-                    reconstructed_scores = transition_scores.sum(axis=1) / (output_length**length_penalty)
-
-                    # Converting logit scores to prob scores
-                    probabilities_scores = F.softmax(reconstructed_scores, dim=-1)
-                    out_idx = torch.argmax(probabilities_scores)
-                    # Final output
-                    output = output_re.sequences[out_idx]
-                    generated_tokens = output[input_length:]
-
-                    logger.info(f"Generated options:\n")
-                    prob_sorted_idxs = sorted(
-                        range(len(probabilities_scores)), key=lambda k: probabilities_scores[k], reverse=True
-                    )
-                    for idx, sorted_idx in enumerate(prob_sorted_idxs):
-                        _out = output_re.sequences[sorted_idx]
-                        res = tokenizer.decode(_out[input_length:], skip_special_tokens=True)
-                        result = res.replace("table_name", _table_name)
-                        # Remove the last semi-colon if exists at the end
-                        # we will add it later
-                        if result.endswith(";"):
-                            result = result.replace(";", "")
-                        if "LIMIT".lower() not in result.lower():
-                            res = "SELECT " + result.strip() + " LIMIT 100;"
-                        else:
-                            res = "SELECT " + result.strip() + ";"
-
-                        pretty_sql = sqlparse.format(res, reindent=True, keyword_case="upper")
-                        syntax_highlight = f"""``` sql\n{pretty_sql}\n```\n\n"""
-                        alt_res = (
-                            f"Option {idx+1}: (_probability_: {probabilities_scores[sorted_idx]})\n{syntax_highlight}\n"
+                        # Create a boolean tensor where elements are True if the corresponding element in transition_scores is less than 0
+                        mask = transition_scores < 0
+                        # Sum the True values along axis 1
+                        counts = torch.sum(mask, dim=1)
+                        output_length = inputs.input_ids.shape[1] + counts
+                        length_penalty = model.generation_config.length_penalty
+                        reconstructed_scores = transition_scores.sum(axis=1) / (output_length**length_penalty)
+
+                        # Converting logit scores to prob scores
+                        probabilities_scores = F.softmax(reconstructed_scores, dim=-1)
+                        out_idx = torch.argmax(probabilities_scores)
+                        # Final output
+                        output = output_re.sequences[out_idx]
+                        generated_tokens = output[input_length:]
+
+                        logger.info(f"Generated options:\n")
+                        prob_sorted_idxs = sorted(
+                            range(len(probabilities_scores)), key=lambda k: probabilities_scores[k], reverse=True
                         )
-                        alternate_queries.append(alt_res)
-                        logger.info(alt_res)
+                        for idx, sorted_idx in enumerate(prob_sorted_idxs):
+                            _out = output_re.sequences[sorted_idx]
+                            res = tokenizer.decode(_out[input_length:], skip_special_tokens=True)
+                            result = res.replace("table_name", _table_name)
+                            # Remove the last semi-colon if exists at the end
+                            # we will add it later
+                            if result.endswith(";"):
+                                result = result.replace(";", "")
+                            if "LIMIT".lower() not in result.lower():
+                                res = "SELECT " + result.strip() + " LIMIT 100;"
+                            else:
+                                res = "SELECT " + result.strip() + ";"
+
+                            pretty_sql = sqlparse.format(res, reindent=True, keyword_case="upper")
+                            syntax_highlight = f"""``` sql\n{pretty_sql}\n```\n\n"""
+                            alt_res = (
+                                f"Option {idx+1}: (_probability_: {probabilities_scores[sorted_idx]})\n{syntax_highlight}\n"
+                            )
+                            alternate_queries.append(alt_res)
+                            logger.info(f"Alternate options:\n{alt_res}")
 
                 _res = generated_tokens
                 if not self.remote_model and tokenizer:
@@ -721,7 +746,7 @@ def generate_sql(
                     # TODO Below should not happen, will have to check why its getting generated as part of response.
                     # Not sure, if its a vllm or prompt issue.
                     _temp = _temp.replace("/[/INST]", "").replace("[INST]", "").replace("[/INST]", "").strip()
-                    if "SELECT".lower() not in _temp.lower():
+                    if not _temp.lower().startswith('SELECT'.lower()):
                             _temp = "SELECT " + _temp.strip()
                             res = _temp
                     if "LIMIT".lower() not in _temp.lower():