From 93dc5a287086299a124e9f1f6fac75458ae0acbd Mon Sep 17 00:00:00 2001
From: Massimiliano Pronesti <massimiliano.pronesti@gmail.com>
Date: Thu, 22 Feb 2024 02:56:01 +0000
Subject: [PATCH] chore(vllm): codespell for spell checking  (#2820)

---
 .github/workflows/ruff.yml                    |  5 +-
 benchmarks/benchmark_serving.py               |  2 +-
 format.sh                                     | 51 +++++++++++++++++--
 mypy.ini                                      |  8 ---
 pyproject.toml                                | 18 +++++++
 requirements-dev.txt                          |  2 +
 tests/lora/test_layers.py                     |  2 +-
 tests/lora/test_llama.py                      |  4 +-
 vllm/core/block_manager.py                    |  2 +-
 vllm/core/scheduler.py                        |  2 +-
 vllm/lora/punica.py                           |  2 +-
 .../layers/triton_kernel/prefix_prefill.py    |  2 +-
 vllm/model_executor/models/decilm.py          |  2 +-
 .../parallel_utils/custom_all_reduce.py       |  4 +-
 .../parallel_utils/parallel_state.py          |  2 +-
 vllm/utils.py                                 |  2 +-
 16 files changed, 85 insertions(+), 25 deletions(-)
 delete mode 100644 mypy.ini

diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
index bd38d11872..8f8f5ee3cc 100644
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -25,7 +25,10 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install ruff==0.1.5
+        pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1
     - name: Analysing the code with ruff
       run: |
         ruff vllm tests
+    - name: Spelling check with codespell
+      run: |
+         codespell --toml pyproject.toml
\ No newline at end of file
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index ff5609c37f..7d389a9c7d 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -375,7 +375,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "--disable-tqdm",
         action="store_true",
-        help="Specify to disbale tqdm progress bar.",
+        help="Specify to disable tqdm progress bar.",
     )
     parser.add_argument(
         "--save-result",
diff --git a/format.sh b/format.sh
index c781088696..eb2c5ab031 100755
--- a/format.sh
+++ b/format.sh
@@ -24,6 +24,7 @@ builtin cd "$ROOT" || exit 1
 YAPF_VERSION=$(yapf --version | awk '{print $2}')
 RUFF_VERSION=$(ruff --version | awk '{print $2}')
 MYPY_VERSION=$(mypy --version | awk '{print $2}')
+CODESPELL_VERSION=$(codespell --version)
 
 # # params: tool name, tool version, required version
 tool_version_check() {
@@ -36,6 +37,7 @@ tool_version_check() {
 tool_version_check "yapf" $YAPF_VERSION "$(grep yapf requirements-dev.txt | cut -d'=' -f3)"
 tool_version_check "ruff" $RUFF_VERSION "$(grep "ruff==" requirements-dev.txt | cut -d'=' -f3)"
 tool_version_check "mypy" "$MYPY_VERSION" "$(grep mypy requirements-dev.txt | cut -d'=' -f3)"
+tool_version_check "codespell" "$CODESPELL_VERSION" "$(grep codespell requirements-dev.txt | cut -d'=' -f3)"
 
 YAPF_FLAGS=(
     '--recursive'
@@ -93,6 +95,47 @@ echo 'vLLM yapf: Done'
 # echo 'vLLM mypy:'
 # mypy
 
+# check spelling of specified files
+spell_check() {
+    codespell "$@"
+}
+
+spell_check_all(){
+  codespell --toml pyproject.toml
+}
+
+# Spelling  check of files that differ from main branch.
+spell_check_changed() {
+    # The `if` guard ensures that the list of filenames is not empty, which
+    # could cause ruff to receive 0 positional arguments, making it hang
+    # waiting for STDIN.
+    #
+    # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
+    # exist on both branches.
+    MERGEBASE="$(git merge-base origin/main HEAD)"
+
+    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
+        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
+             codespell
+    fi
+}
+
+# Run Codespell
+## This flag runs spell check of individual files. --files *must* be the first command line
+## arg to use this option.
+if [[ "$1" == '--files' ]]; then
+   spell_check "${@:2}"
+   # If `--all` is passed, then any further arguments are ignored and the
+   # entire python directory is linted.
+elif [[ "$1" == '--all' ]]; then
+   spell_check_all
+else
+   # Check spelling only of the files that changed in last commit.
+   spell_check_changed
+fi
+echo 'vLLM codespell: Done'
+
+
 # Lint specified files
 lint() {
     ruff "$@"
@@ -117,9 +160,9 @@ lint_changed() {
 }
 
 # Run Ruff
-echo 'vLLM Ruff:'
-## This flag lints individual files. --files *must* be the first command line
-## arg to use this option.
+echo 'vLLM ruff:'
+### This flag lints individual files. --files *must* be the first command line
+### arg to use this option.
 if [[ "$1" == '--files' ]]; then
    lint "${@:2}"
    # If `--all` is passed, then any further arguments are ignored and the
@@ -139,3 +182,5 @@ if ! git diff --quiet &>/dev/null; then
 
     exit 1
 fi
+
+
diff --git a/mypy.ini b/mypy.ini
deleted file mode 100644
index 55c4248ea9..0000000000
--- a/mypy.ini
+++ /dev/null
@@ -1,8 +0,0 @@
-[mypy]
-python_version = 3.8
-
-ignore_missing_imports = True
-
-files = vllm
-# TODO(woosuk): Include the code from Megatron and HuggingFace.
-exclude = vllm/model_executor/parallel_utils/|vllm/model_executor/models/
diff --git a/pyproject.toml b/pyproject.toml
index b197256f6f..c5db016ceb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,4 +31,22 @@ ignore = [
     "E731",
     # line too long, handled by black formatting
     "E501",
+    # .strip() with multi-character strings
+    "B005",
+    # Loop control variable not used within loop body
+    "B007",
 ]
+
+[tool.mypy]
+python_version = "3.8"
+
+ignore_missing_imports = true
+
+files = "vllm"
+# TODO(woosuk): Include the code from Megatron and HuggingFace.
+exclude = "vllm/model_executor/parallel_utils/|vllm/model_executor/models/"
+
+
+[tool.codespell]
+ignore-words-list = "dout, te, indicies"
+skip = "./tests/prompts"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index f8126008d0..b54a277324 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,7 +1,9 @@
 # formatting
 yapf==0.32.0
 toml==0.10.2
+tomli==2.0.1
 ruff==0.1.5
+codespell==2.2.6
 
 # type checking
 mypy==0.991
diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py
index f739bbeaab..18ce300449 100644
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -279,7 +279,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device) -> None:
             256,
             org_num_embeddings=512)
         expanded_embedding.weight.data[:512, :] = embedding_data
-        # We need to deepcopy the embedding as it will be modifed
+        # We need to deepcopy the embedding as it will be modified
         # in place
         lora_embedding = VocabParallelEmbeddingWithLoRA(
             deepcopy(expanded_embedding))
diff --git a/tests/lora/test_llama.py b/tests/lora/test_llama.py
index 06fbf19eea..dfaf8c7006 100644
--- a/tests/lora/test_llama.py
+++ b/tests/lora/test_llama.py
@@ -15,7 +15,7 @@ def do_sample(llm, lora_path: str, lora_id: int):
         "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_95 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a low tone mora with a gloss of /˩okiru/ [òkìɽɯ́]? [/user] [assistant]",
         "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. [/user] [assistant]",
         "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? [/user] [assistant]",
-        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]"
+        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]"
     ]
     sampling_params = vllm.SamplingParams(temperature=0,
                                           max_tokens=256,
@@ -53,7 +53,7 @@ def test_llama_lora(sql_lora_files, tp_size):
         "\n\n answer: 1\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_96 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_97 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_98 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one m",
         " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. ",
         " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? ",
-        "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE",
+        "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE",
     ]
     expected_lora_output = [
         "  SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ",
diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
index 7f91051f03..3946096d42 100644
--- a/vllm/core/block_manager.py
+++ b/vllm/core/block_manager.py
@@ -178,7 +178,7 @@ class BlockSpaceManager:
         if len(block_table) < len(logical_blocks):
             if (self.block_sliding_window
                     and len(block_table) >= self.block_sliding_window):
-                # re-use a block
+                # reuse a block
                 block_table.append(block_table[len(block_table) %
                                                self.block_sliding_window])
             else:
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
index f4ac2d6dc5..5e7cc3091d 100644
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -158,7 +158,7 @@ class Scheduler:
         return len(self.waiting) + len(self.running) + len(self.swapped)
 
     def _schedule(self) -> SchedulerOutputs:
-        # Blocks that need to be swaped or copied before model execution.
+        # Blocks that need to be swapped or copied before model execution.
         blocks_to_swap_in: Dict[int, int] = {}
         blocks_to_swap_out: Dict[int, int] = {}
         blocks_to_copy: Dict[int, List[int]] = {}
diff --git a/vllm/lora/punica.py b/vllm/lora/punica.py
index 307a33dcf2..fc74269e55 100644
--- a/vllm/lora/punica.py
+++ b/vllm/lora/punica.py
@@ -87,7 +87,7 @@ def add_lora(y: torch.Tensor,
     r = wb_t_all.size(-1)
     if buffer is None:
         # We set the buffer to be float32 by default to avoid
-        # numerical innacuracies that would otherwise happen
+        # numerical inaccuracies that would otherwise happen
         # due to downcasting.
         buffer = torch.zeros((x.size(0), r),
                              dtype=torch.float32,
diff --git a/vllm/model_executor/layers/triton_kernel/prefix_prefill.py b/vllm/model_executor/layers/triton_kernel/prefix_prefill.py
index ba40d42307..a1a2ab0c48 100644
--- a/vllm/model_executor/layers/triton_kernel/prefix_prefill.py
+++ b/vllm/model_executor/layers/triton_kernel/prefix_prefill.py
@@ -537,7 +537,7 @@ if triton.__version__ >= "2.1.0":
         alibi_start_q = tl.arange(
             0, BLOCK_M) + block_start_loc + cur_batch_ctx_len
         alibi_start_k = cur_batch_ctx_len
-        # # init debuger
+        # # init debugger
         # offset_db_q = tl.arange(0, BLOCK_M) + block_start_loc
         # offset_db_k = tl.arange(0, BLOCK_N)
         # calc q[BLOCK_M, BLOCK_MODEL] mul k[prefix_len: , BLOCK_DMODEL]
diff --git a/vllm/model_executor/models/decilm.py b/vllm/model_executor/models/decilm.py
index 07aa4b72bf..abf4a46287 100644
--- a/vllm/model_executor/models/decilm.py
+++ b/vllm/model_executor/models/decilm.py
@@ -41,7 +41,7 @@ class DeciLMForCausalLM(LlamaForCausalLM):
     Based on the llama executor.
 
     The main difference is that DeciLM uses Variable Grouped Query Attention.
-    The constant number of GQA heads in the decoder is overriden with a value
+    The constant number of GQA heads in the decoder is overridden with a value
     per layer.
 
     Usually, in the HuggingFace implementation, instead of
diff --git a/vllm/model_executor/parallel_utils/custom_all_reduce.py b/vllm/model_executor/parallel_utils/custom_all_reduce.py
index ce4c8d02f7..0c749c0484 100644
--- a/vllm/model_executor/parallel_utils/custom_all_reduce.py
+++ b/vllm/model_executor/parallel_utils/custom_all_reduce.py
@@ -36,14 +36,14 @@ def init_custom_ar() -> None:
     if world_size not in _SUPPORTED_WORLD_SIZES:
         logger.warn(
             "Custom allreduce is disabled due to an unsupported world size: "
-            "%d. Supported world sizes: %s. To slience this warning, specify"
+            "%d. Supported world sizes: %s. To silence this warning, specify"
             "disable_custom_all_reduce=True explicitly.", world_size,
             str(_SUPPORTED_WORLD_SIZES))
         return
     if not _can_p2p(rank, world_size):
         logger.warn(
             "Custom allreduce is disabled because your platform lacks GPU P2P"
-            " capability. To slience this warning, specify"
+            " capability. To silence this warning, specify"
             "disable_custom_all_reduce=True explicitly.")
         return
     _CA_HANDLE = CustomAllreduce(rank, world_size)
diff --git a/vllm/model_executor/parallel_utils/parallel_state.py b/vllm/model_executor/parallel_utils/parallel_state.py
index aeb07f64c3..c821936d06 100644
--- a/vllm/model_executor/parallel_utils/parallel_state.py
+++ b/vllm/model_executor/parallel_utils/parallel_state.py
@@ -189,7 +189,7 @@ def get_pipeline_model_parallel_next_rank():
 
 
 def get_pipeline_model_parallel_prev_rank():
-    """Return the global rank that preceeds the caller in the pipeline"""
+    """Return the global rank that precedes the caller in the pipeline"""
     assert _PIPELINE_GLOBAL_RANKS is not None, (
         "Pipeline parallel group is not initialized")
     rank_in_pipeline = get_pipeline_model_parallel_rank()
diff --git a/vllm/utils.py b/vllm/utils.py
index d7a3a3a2a9..6206879929 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -204,7 +204,7 @@ def _generate_random_fp8_e5m2(
     # NOTE(zhaoyang): Due to NaN and Inf representation for fp8 data type,
     # it may occur Inf or NaN if we directly use torch.randint
     # to generate random data for fp8 data.
-    # For example, s.11111.00 in fp8e5m2 format repesents Inf.
+    # For example, s.11111.00 in fp8e5m2 format represents Inf.
     #     | E4M3        | E5M2
     #-----|-------------|-------------------
     # Inf | N/A         | s.11111.00