Merge branch 'main' into stable

2022-10-04 14:20:10 +02:00 · 2022-10-04 14:20:10 +02:00 · a7d2c9572e
parent fa315352da 53c7c1b07f
commit a7d2c9572e
35 changed files with 824 additions and 139 deletions
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@ -1,13 +1,13 @@
 contact_links:
+  - name: Support questions & other
+    url: https://github.com/meilisearch/meilisearch/discussions/new
+    about: For any other question, open a discussion in this repository
  - name: Language support request & feedback
    url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal?discussions_q=label%3Aproduct%3Acore%3Atokenizer+category%3A%22Feedback+%26+Feature+Proposal%22
    about:  The requests and feedback regarding Language support are not managed in this repository. Please upvote the related discussion in our dedicated product repository or open a new one if it doesn't exist.
-  - name: Feature request & feedback
+  - name: Any other feature request & feedback
    url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal
    about: The feature requests and feedback regarding the already existing features are not managed in this repository. Please open a discussion in our dedicated product repository
  - name: Documentation issue
    url: https://github.com/meilisearch/documentation/issues/new
    about: For documentation issues, open an issue or a PR in the documentation repository
-  - name: Support questions & other
-    url: https://github.com/meilisearch/meilisearch/discussions/new
-    about: For any other question, open a discussion in this repository
--- a/.github/scripts/is-latest-release.sh
+++ b/.github/scripts/is-latest-release.sh
@ -85,7 +85,7 @@ get_latest() {
    latest=""
    current_tag=""
    for release_info in $releases; do
-        if [ $i -eq 0 ]; then # Cheking tag_name
+        if [ $i -eq 0 ]; then # Checking tag_name
            if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then # If it's not an alpha or beta release
                current_tag=$release_info
            else
--- a/.github/workflows/create-issue-dependencies.yml
+++ b/.github/workflows/create-issue-dependencies.yml
@ -3,7 +3,7 @@ on:
  schedule:
    - cron: '0 0 1 */3 *'
  workflow_dispatch:
-  
+
 jobs:
  create-issue:
    runs-on: ubuntu-latest
@ -12,12 +12,12 @@ jobs:
    - name: Create an issue
      uses: actions-ecosystem/action-create-issue@v1
      with:
-        github_token: ${{ secrets.GITHUB_TOKEN }}
+        github_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        title: Upgrade dependencies
        body: |
          We need to update the dependencies of the Meilisearch repository, and, if possible, the dependencies of all the core-team repositories that Meilisearch depends on (milli, charabia, heed...).

-          ⚠️ This issue should only be done at the beginning of the sprint!     
+          ⚠️ This issue should only be done at the beginning of the sprint!
        labels: |
          dependencies
          maintenance
--- a/.github/workflows/milestone-workflow.yml
+++ b/.github/workflows/milestone-workflow.yml
@ -0,0 +1,156 @@
+name: Milestone's workflow
+
+# /!\ No git flow are handled here
+
+# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed)
+# - the roadmap issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/roadmap-issue.md
+# - the changelog issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/changelog-issue.md
+
+# For each Milestone closed
+# - the `release_version` label is created
+# - this label is applied to all issues/PRs in the Milestone
+
+on:
+  milestone:
+    types: [created, closed]
+
+env:
+  MILESTONE_VERSION: ${{ github.event.milestone.title }}
+  MILESTONE_URL: ${{ github.event.milestone.html_url }}
+  MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }}
+  GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
+
+jobs:
+
+# -----------------
+# MILESTONE CREATED
+# -----------------
+
+  get-release-version:
+    if: github.event.action == 'created'
+    runs-on: ubuntu-latest
+    outputs:
+      is-patch: ${{ steps.check-patch.outputs.is-patch }}
+    env:
+      MILESTONE_VERSION: ${{ github.event.milestone.title }}
+    steps:
+      - uses: actions/checkout@v3
+      - name: Check if this release is a patch release only
+        id: check-patch
+        run: |
+          echo version: $MILESTONE_VERSION
+          if [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.0$ ]]; then
+            echo 'This is NOT a patch release'
+            echo ::set-output name=is-patch::false
+          elif [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            echo 'This is a patch release'
+            echo ::set-output name=is-patch::true
+          else
+            echo "Not a valid format of release, check the Milestone's title."
+            echo 'Should be vX.Y.Z'
+            exit 1
+          fi
+
+  create-roadmap-issue:
+    needs: get-release-version
+    # Create the roadmap issue if the release is not only a patch release
+    if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
+    runs-on: ubuntu-latest
+    env:
+      ISSUE_TEMPLATE: issue-template.md
+    steps:
+      - uses: actions/checkout@v3
+      - name: Download the issue template
+        run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE
+      - name: Replace all empty occurrences in the templates
+        run: |
+          # Replace all <<version>> occurrences
+          sed -i "s/<<version>>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE
+
+          # Replace all <<milestone_id>> occurrences
+          milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7)
+          sed -i "s/<<milestone_id>>/$milestone_id/g" $ISSUE_TEMPLATE
+
+          # Replace release date if exists
+          if [[ ! -z $MILESTONE_DUE_ON ]]; then
+            date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1)
+            sed -i "s/Release date\: 20XX-XX-XX/Release date\: $date/g" $ISSUE_TEMPLATE
+          fi
+      - name: Create the issue
+        run: |
+          gh issue create \
+            --title "$MILESTONE_VERSION ROADMAP" \
+            --label 'epic,impacts docs,impacts integrations,impacts cloud' \
+            --body-file $ISSUE_TEMPLATE \
+            --milestone $MILESTONE_VERSION
+
+  create-changelog-issue:
+    needs: get-release-version
+    # Create the changelog issue if the release is not only a patch release
+    if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
+    runs-on: ubuntu-latest
+    env:
+      ISSUE_TEMPLATE: issue-template.md
+    steps:
+      - uses: actions/checkout@v3
+      - name: Download the issue template
+        run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE
+      - name: Replace all empty occurrences in the templates
+        run: |
+          # Replace all <<version>> occurrences
+          sed -i "s/<<version>>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE
+
+          # Replace all <<milestone_id>> occurrences
+          milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7)
+          sed -i "s/<<milestone_id>>/$milestone_id/g" $ISSUE_TEMPLATE
+      - name: Create the issue
+        run: |
+          gh issue create \
+            --title "Create release changelogs for $MILESTONE_VERSION" \
+            --label 'impacts docs,documentation' \
+            --body-file $ISSUE_TEMPLATE \
+            --milestone $MILESTONE_VERSION \
+            --assignee curquiza
+
+# ----------------
+# MILESTONE CLOSED
+# ----------------
+
+  create-release-label:
+    if: github.event.action == 'closed'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Create the ${{ env.MILESTONE_VERSION }} label
+        run: |
+          label_description="PRs/issues solved in $MILESTONE_VERSION"
+          if [[ ! -z $MILESTONE_DUE_ON ]]; then
+            date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1)
+            label_description="$label_description released on $date"
+          fi
+
+          gh api repos/curquiza/meilisearch/labels \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            -f name="$MILESTONE_VERSION" \
+            -f description="$label_description" \
+            -f color='ff5ba3'
+
+  labelize-all-milestone-content:
+    if: github.event.action == 'closed'
+    needs: create-release-label
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Add label ${{ env.MILESTONE_VERSION }} to all PRs in the Milestone
+        run: |
+          prs=$(gh pr list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}')
+          for pr in $prs; do
+              gh pr $pr edit --add-label $MILESTONE_VERSION
+          done
+      - name: Add label ${{ env.MILESTONE_VERSION }} to all issues in the Milestone
+        run: |
+          issues=$(gh issue list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}')
+          for issue in $issues; do
+              gh issue edit $issue --add-label $MILESTONE_VERSION
+          done
--- a/.github/workflows/publish-binaries.yml
+++ b/.github/workflows/publish-binaries.yml
@ -1,4 +1,6 @@
 on:
+  schedule:
+    - cron: '0 2 * * *' # Every day at 2:00am
  release:
    types: [published]

@ -8,8 +10,9 @@ jobs:
  check-version:
    name: Check the version validity
    runs-on: ubuntu-latest
+    # No need to check the version for dry run (cron)
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
      # Check if the tag has the v<nmumber>.<number>.<number> format.
      # If yes, it means we are publishing an official release.
      # If no, we are releasing a RC, so no need to check the version.
@ -25,7 +28,7 @@ jobs:
            echo ::set-output name=stable::false
          fi
      - name: Check release validity
-        if: steps.check-tag-format.outputs.stable == 'true'
+        if: github.event_name != 'schedule' && steps.check-tag-format.outputs.stable == 'true'
        run: bash .github/scripts/check-release.sh

  publish:
@ -54,10 +57,12 @@ jobs:
    - uses: actions/checkout@v3
    - name: Build
      run: cargo build --release --locked
+    # No need to upload binaries for dry run (cron)
    - name: Upload binaries to release
+      if: github.event_name != 'schedule'
      uses: svenstaro/upload-release-action@v1-release
      with:
-        repo_token: ${{ secrets.PUBLISH_TOKEN }}
+        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/release/${{ matrix.artifact_name }}
        asset_name: ${{ matrix.asset_name }}
        tag: ${{ github.ref }}
@ -123,9 +128,11 @@ jobs:
        run: ls -lR ./target

      - name: Upload the binary to release
+        # No need to upload binaries for dry run (cron)
+        if: github.event_name != 'schedule'
        uses: svenstaro/upload-release-action@v1-release
        with:
-          repo_token: ${{ secrets.PUBLISH_TOKEN }}
+          repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
          file: target/${{ matrix.target }}/release/meilisearch
          asset_name: ${{ matrix.asset_name }}
          tag: ${{ github.ref }}
--- a/.github/workflows/publish-deb-brew-pkg.yml
+++ b/.github/workflows/publish-deb-brew-pkg.yml
@ -9,7 +9,7 @@ jobs:
    name: Check the version validity
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
      - name: Check release validity
        run: bash .github/scripts/check-release.sh

@ -29,7 +29,7 @@ jobs:
    - name: Upload debian pkg to release
      uses: svenstaro/upload-release-action@v1-release
      with:
-        repo_token: ${{ secrets.GITHUB_TOKEN }}
+        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/debian/meilisearch.deb
        asset_name: meilisearch.deb
        tag: ${{ github.ref }}
--- a/.github/workflows/publish-docker-images.yml
+++ b/.github/workflows/publish-docker-images.yml
@ -12,7 +12,7 @@ jobs:
  docker:
    runs-on: docker
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3

      # Check if the tag has the v<nmumber>.<number>.<number> format. If yes, it means we are publishing an official release.
      # In this situation, we need to set `output.stable` to create/update the following tags (additionally to the `vX.Y.Z` Docker tag):
@ -53,7 +53,7 @@ jobs:
        uses: docker/metadata-action@v4
        with:
          images: getmeili/meilisearch
-          # The lastest and `vX.Y` tags are only pushed for the official Meilisearch releases
+          # The latest and `vX.Y` tags are only pushed for the official Meilisearch releases
          # See https://github.com/docker/metadata-action#latest-tag
          flavor: latest=false
          tags: |
@ -62,10 +62,19 @@ jobs:
            type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' }}

      - name: Build and push
-        id: docker_build
        uses: docker/build-push-action@v3
        with:
          # We do not push tags for the cron jobs, this is only for test purposes
          push: ${{ github.event_name != 'schedule' }}
          platforms: linux/amd64,linux/arm64
          tags: ${{ steps.meta.outputs.tags }}
+
+      # /!\ Don't touch this without checking with Cloud team
+      - name: Send CI information to Cloud team
+        if: github.event_name != 'schedule'
+        uses: peter-evans/repository-dispatch@v2
+        with:
+          token: ${{ secrets.MEILI_BOT_GH_PAT }}
+          repository: meilisearch/meilisearch-cloud
+          event-type: cloud-docker-build
+          client-payload: '{ "meilisearch_version": "${{ steps.meta.outputs.tags }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'
--- a/.github/workflows/update-cargo-toml-version.yml
+++ b/.github/workflows/update-cargo-toml-version.yml
@ -0,0 +1,47 @@
+name: Update Meilisearch version in all Cargo.toml files
+
+on:
+  workflow_dispatch:
+    inputs:
+      new_version:
+        description: 'The new version (vX.Y.Z)'
+        required: true
+
+env:
+  NEW_VERSION: ${{ github.event.inputs.new_version }}
+  NEW_BRANCH: update-version-${{ github.event.inputs.new_version }}
+  GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
+
+jobs:
+
+  update-version-cargo-toml:
+    name: Update version in Cargo.toml files
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - name: Install sd
+        run: cargo install sd
+      - name: Update Cargo.toml files
+        run: |
+          raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2)
+          new_string="version = \"$raw_new_version\""
+          sd '^version = "\d+.\d+.\w+"$' "$new_string" */Cargo.toml
+      - name: Build Meilisearch to update Cargo.lock
+        run: cargo build
+      - name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch
+        uses: EndBug/add-and-commit@v9
+        with:
+          message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml files"
+          new_branch: ${{ env.NEW_BRANCH }}
+      - name: Create the PR pointing to ${{ github.ref_name }}
+        run: |
+          gh pr create \
+            --title "Update version for the next release ($NEW_VERSION) in Cargo.toml files" \
+            --body '⚠️ This PR is automatically generated. Check the new version is the expected one before merging.' \
+            --label 'skip changelog' \
+            --milestone $NEW_VERSION
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -10,12 +10,24 @@ If Meilisearch does not offer optimized support for your language, please consid

 ## Table of Contents

+- [Hacktoberfest 2022](#hacktoberfest-2022)
 - [Assumptions](#assumptions)
 - [How to Contribute](#how-to-contribute)
 - [Development Workflow](#development-workflow)
 - [Git Guidelines](#git-guidelines)
 - [Release Process (for internal team only)](#release-process-for-internal-team-only)

+## Hacktoberfest 2022
+
+It's [Hacktoberfest month](https://hacktoberfest.com)! 🥳
+
+Thanks so much for participating with Meilisearch this year!
+1. We will follow the quality standards set by the organizers of Hacktoberfest (see detail on their [website](https://hacktoberfest.com/participation/#spam)). Our reviewers will not consider any PR that doesn’t match that standard.
+2. PRs reviews will take place from Monday to Thursday, during usual working hours, CEST time. If you submit outside of these hours, there’s no need to panic; we will get around to your contribution.
+3. There will be no issue assignment as we don’t want people to ask to be assigned specific issues and never return, discouraging the volunteer contributors from opening a PR to fix this issue. We take the liberty to choose the PR that best fixes the issue, so we encourage you to get to it as soon as possible and do your best!
+
+You can check out the longer, more complete guideline documentation [here](https://github.com/meilisearch/.github/blob/main/Hacktoberfest_2022_contributors_guidelines.md).
+
 ## Assumptions

 1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.**
@ -102,7 +114,7 @@ The full Meilisearch release process is described in [this guide](https://github
 ### Release assets

 For each release, the following assets are created:
- Binaries for differents platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release
+- Binaries for different platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release
 - Binaries are pushed to HomeBrew and APT (not published for RC)
 - Docker tags are created/updated:
  - `vX.Y.Z`
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2091,6 +2091,7 @@ dependencies = [
 "time 0.3.14",
 "tokio",
 "tokio-stream",
+ "toml",
 "urlencoding",
 "uuid",
 "vergen",
--- a/4
+++ b/4
@ -1,5 +1,5 @@
 # Compile
-FROM    rust:alpine3.14 AS compiler
+FROM    rust:alpine3.16 AS compiler

 RUN     apk add -q --update-cache --no-cache build-base openssl-dev

@ -19,7 +19,7 @@ RUN     set -eux; \
        cargo build --release

 # Run
-FROM    alpine:3.14
+FROM    alpine:3.16

 ENV     MEILI_HTTP_ADDR 0.0.0.0:7700
 ENV     MEILI_SERVER_PROVIDER docker
--- a/README.md
+++ b/README.md
@ -34,6 +34,14 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f

 🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥

+## 🎃 Hacktoberfest
+
+It’s Hacktoberfest 2022 @Meilisearch
+
+[Hacktoberfest](https://hacktoberfest.com/) is a celebration of the open-source community. This year, and for the third time in a row, Meilisearch is participating in this fantastic event.
+
+You’d like to contribute? Don’t hesitate to check out our [contributing guidelines](./CONTRIBUTING.md).
+
 ## ✨ Features

 - **Search-as-you-type:** find search results in less than 50 milliseconds
--- a/config.toml
+++ b/config.toml
@ -0,0 +1,129 @@
+# This file shows the default configuration of Meilisearch.
+# All variables are defined here https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables
+
+db_path = "./data.ms"
+# The destination where the database must be created.
+
+env = "development" # Possible values: [development, production]
+# This environment variable must be set to `production` if you are running in production.
+# More logs wiil be displayed if the server is running in development mode. Setting the master
+# key is optional; hence no security on the updates routes. This
+# is useful to debug when integrating the engine with another service.
+
+http_addr = "127.0.0.1:7700"
+# The address on which the HTTP server will listen.
+
+# master_key = "MASTER_KEY"
+# Sets the instance's master key, automatically protecting all routes except GET /health.
+
+# no_analytics = false
+# Do not send analytics to Meilisearch.
+
+disable_auto_batching = false
+# The engine will disable task auto-batching, and will sequencialy compute each task one by one.
+
+
+### DUMP
+
+dumps_dir = "dumps/"
+# Folder where dumps are created when the dump route is called.
+
+# import_dump = "./path/to/my/file.dump"
+# Import a dump from the specified path, must be a `.dump` file.
+
+ignore_missing_dump = false
+# If the dump doesn't exist, load or create the database specified by `db_path` instead.
+
+ignore_dump_if_db_exists = false
+# Ignore the dump if a database already exists, and load that database instead.
+
+###
+
+
+log_level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE]
+# Set the log level.
+
+
+### INDEX
+
+max_index_size = "100 GiB"
+# The maximum size, in bytes, of the main LMDB database directory.
+
+# max_indexing_memory = "2 GiB"
+# The maximum amount of memory the indexer will use. 
+#
+# In case the engine is unable to retrieve the available memory the engine will try to use
+# the memory it needs but without real limit, this can lead to Out-Of-Memory issues and it
+# is recommended to specify the amount of memory to use.
+#
+# /!\ The default value is system dependant /!\
+
+# max_indexing_threads = 4
+# The maximum number of threads the indexer will use. If the number set is higher than the
+# real number of cores available in the machine, it will use the maximum number of
+# available cores.
+#
+# It defaults to half of the available threads.
+
+###
+
+
+max_task_db_size = "100 GiB"
+# The maximum size, in bytes, of the update LMDB database directory.
+
+http_payload_size_limit = "100 MB"
+# The maximum size, in bytes, of accepted JSON payloads.
+
+
+### SNAPSHOT
+
+schedule_snapshot = false
+# Activate snapshot scheduling.
+
+snapshot_dir = "snapshots/"
+# Defines the directory path where Meilisearch will create a snapshot each snapshot_interval_sec.
+
+snapshot_interval_sec = 86400
+# Defines time interval, in seconds, between each snapshot creation.
+
+# import_snapshot = "./path/to/my/snapshot"
+# Defines the path of the snapshot file to import. This option will, by default, stop the
+# process if a database already exists, or if no snapshot exists at the given path. If this
+# option is not specified, no snapshot is imported.
+
+ignore_missing_snapshot = false
+# The engine will ignore a missing snapshot and not return an error in such a case.
+
+ignore_snapshot_if_db_exists = false
+# The engine will skip snapshot importation and not return an error in such a case.
+
+###
+
+
+### SSL
+
+# ssl_auth_path = "./path/to/root"
+# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE.
+
+# ssl_cert_path = "./path/to/CERTFILE"
+# Read server certificates from CERTFILE. This should contain PEM-format certificates in
+# the right order (the first certificate should certify KEYFILE, the last should be a root
+# CA).
+
+# ssl_key_path = "./path/to/private-key"
+# Read the private key from KEYFILE.  This should be an RSA private key or PKCS8-encoded
+# private key, in PEM format.
+
+# ssl_ocsp_path = "./path/to/OCSPFILE"
+# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional.
+
+ssl_require_auth = false
+# Send a fatal alert if the client does not complete client authentication.
+
+ssl_resumption = false
+# SSL support session resumption.
+ 
+ssl_tickets = false
+# SSL support tickets.
+
+###
--- a/meilisearch-auth/Cargo.toml
+++ b/meilisearch-auth/Cargo.toml
@ -7,7 +7,7 @@ edition = "2021"
 enum-iterator = "0.7.0"
 hmac = "0.12.1"
 meilisearch-types = { path = "../meilisearch-types" }
-milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" }
+milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false }
 rand = "0.8.4"
 serde = { version = "1.0.136", features = ["derive"] }
 serde_json = { version = "1.0.85", features = ["preserve_order"] }
--- a/meilisearch-http/Cargo.toml
+++ b/meilisearch-http/Cargo.toml
@ -46,7 +46,7 @@ jsonwebtoken = "8.0.1"
 log = "0.4.14"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
-meilisearch-lib = { path = "../meilisearch-lib" }
+meilisearch-lib = { path = "../meilisearch-lib", default-features = false }
 mimalloc = { version = "0.1.29", default-features = false }
 mime = "0.3.16"
 num_cpus = "1.13.1"
@ -76,6 +76,7 @@ thiserror = "1.0.30"
 time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
 tokio = { version = "1.17.0", features = ["full"] }
 tokio-stream = "0.1.8"
+toml = "0.5.9"
 uuid = { version = "1.1.2", features = ["serde", "v4"] }
 walkdir = "2.3.2"
 prometheus = { version = "0.13.0", features = ["process"], optional = true }
@ -90,7 +91,7 @@ urlencoding = "2.1.0"
 yaup = "0.2.0"

 [features]
-default = ["analytics", "mini-dashboard"]
+default = ["analytics", "meilisearch-lib/default", "mini-dashboard"]
 metrics = ["prometheus"]
 analytics = ["segment"]
 mini-dashboard = [
@ -104,6 +105,10 @@ mini-dashboard = [
    "tempfile",
    "zip",
 ]
+chinese = ["meilisearch-lib/chinese"]
+hebrew = ["meilisearch-lib/hebrew"]
+japanese = ["meilisearch-lib/japanese"]
+thai = ["meilisearch-lib/thai"]

 [package.metadata.mini-dashboard]
 assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.2/build.zip"
--- a/meilisearch-http/src/analytics/segment_analytics.rs
+++ b/meilisearch-http/src/analytics/segment_analytics.rs
@ -349,16 +349,16 @@ pub struct SearchAggregator {

    // sort
    sort_with_geo_point: bool,
-    // everytime a request has a filter, this field must be incremented by the number of terms it contains
+    // every time a request has a filter, this field must be incremented by the number of terms it contains
    sort_sum_of_criteria_terms: usize,
-    // everytime a request has a filter, this field must be incremented by one
+    // every time a request has a filter, this field must be incremented by one
    sort_total_number_of_criteria: usize,

    // filter
    filter_with_geo_radius: bool,
-    // everytime a request has a filter, this field must be incremented by the number of terms it contains
+    // every time a request has a filter, this field must be incremented by the number of terms it contains
    filter_sum_of_criteria_terms: usize,
-    // everytime a request has a filter, this field must be incremented by one
+    // every time a request has a filter, this field must be incremented by one
    filter_total_number_of_criteria: usize,
    used_syntax: HashMap<String, usize>,

@ -366,7 +366,7 @@ pub struct SearchAggregator {
    // The maximum number of terms in a q request
    max_terms_number: usize,

-    // everytime a search is done, we increment the counter linked to the used settings
+    // every time a search is done, we increment the counter linked to the used settings
    matching_strategy: HashMap<String, usize>,

    // pagination
--- a/meilisearch-http/src/main.rs
+++ b/meilisearch-http/src/main.rs
@ -1,9 +1,9 @@
 use std::env;
+use std::path::PathBuf;
 use std::sync::Arc;

 use actix_web::http::KeepAlive;
 use actix_web::HttpServer;
-use clap::Parser;
 use meilisearch_auth::AuthController;
 use meilisearch_http::analytics;
 use meilisearch_http::analytics::Analytics;
@ -29,7 +29,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {

 #[actix_web::main]
 async fn main() -> anyhow::Result<()> {
-    let opt = Opt::parse();
+    let (opt, config_read_from) = Opt::try_build()?;

    setup(&opt)?;

@ -58,7 +58,7 @@ async fn main() -> anyhow::Result<()> {
    #[cfg(any(debug_assertions, not(feature = "analytics")))]
    let (analytics, user) = analytics::MockAnalytics::new(&opt);

-    print_launch_resume(&opt, &user);
+    print_launch_resume(&opt, &user, config_read_from);

    run_http(meilisearch, auth_controller, opt, analytics).await?;

@ -97,7 +97,7 @@ async fn run_http(
    Ok(())
 }

-pub fn print_launch_resume(opt: &Opt, user: &str) {
+pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option<PathBuf>) {
    let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
    let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
    let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() {
@ -118,6 +118,12 @@ pub fn print_launch_resume(opt: &Opt, user: &str) {

    eprintln!("{}", ascii_name);

+    eprintln!(
+        "Config file path:\t{:?}",
+        config_read_from
+            .map(|config_file_path| config_file_path.display().to_string())
+            .unwrap_or_else(|| "none".to_string())
+    );
    eprintln!("Database path:\t\t{:?}", opt.db_path);
    eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr);
    eprintln!("Environment:\t\t{:?}", opt.env);
--- a/meilisearch-http/src/option.rs
+++ b/meilisearch-http/src/option.rs
@ -5,7 +5,10 @@ use std::sync::Arc;

 use byte_unit::Byte;
 use clap::Parser;
-use meilisearch_lib::options::{IndexerOpts, SchedulerConfig};
+use meilisearch_lib::{
+    export_to_env_if_not_present,
+    options::{IndexerOpts, SchedulerConfig},
+};
 use rustls::{
    server::{
        AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient,
@ -14,141 +17,208 @@ use rustls::{
    RootCertStore,
 };
 use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
-use serde::Serialize;
+use serde::{Deserialize, Serialize};

 const POSSIBLE_ENV: [&str; 2] = ["development", "production"];

-#[derive(Debug, Clone, Parser, Serialize)]
+const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
+const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
+const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
+const MEILI_ENV: &str = "MEILI_ENV";
+#[cfg(all(not(debug_assertions), feature = "analytics"))]
+const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
+const MEILI_MAX_INDEX_SIZE: &str = "MEILI_MAX_INDEX_SIZE";
+const MEILI_MAX_TASK_DB_SIZE: &str = "MEILI_MAX_TASK_DB_SIZE";
+const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
+const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH";
+const MEILI_SSL_KEY_PATH: &str = "MEILI_SSL_KEY_PATH";
+const MEILI_SSL_AUTH_PATH: &str = "MEILI_SSL_AUTH_PATH";
+const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH";
+const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH";
+const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION";
+const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS";
+const MEILI_IMPORT_SNAPSHOT: &str = "MEILI_IMPORT_SNAPSHOT";
+const MEILI_IGNORE_MISSING_SNAPSHOT: &str = "MEILI_IGNORE_MISSING_SNAPSHOT";
+const MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS: &str = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS";
+const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR";
+const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT";
+const MEILI_SNAPSHOT_INTERVAL_SEC: &str = "MEILI_SNAPSHOT_INTERVAL_SEC";
+const MEILI_IMPORT_DUMP: &str = "MEILI_IMPORT_DUMP";
+const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
+const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
+const MEILI_DUMPS_DIR: &str = "MEILI_DUMPS_DIR";
+const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
+#[cfg(feature = "metrics")]
+const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE";
+
+const DEFAULT_DB_PATH: &str = "./data.ms";
+const DEFAULT_HTTP_ADDR: &str = "127.0.0.1:7700";
+const DEFAULT_ENV: &str = "development";
+const DEFAULT_MAX_INDEX_SIZE: &str = "100 GiB";
+const DEFAULT_MAX_TASK_DB_SIZE: &str = "100 GiB";
+const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB";
+const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
+const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
+const DEFAULT_DUMPS_DIR: &str = "dumps/";
+const DEFAULT_LOG_LEVEL: &str = "INFO";
+
+#[derive(Debug, Clone, Parser, Serialize, Deserialize)]
 #[clap(version)]
+#[serde(rename_all = "snake_case", deny_unknown_fields)]
 pub struct Opt {
    /// The destination where the database must be created.
-    #[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
+    #[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())]
+    #[serde(default = "default_db_path")]
    pub db_path: PathBuf,

    /// The address on which the http server will listen.
-    #[clap(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
+    #[clap(long, env = MEILI_HTTP_ADDR, default_value_t = default_http_addr())]
+    #[serde(default = "default_http_addr")]
    pub http_addr: String,

-    /// The master key allowing you to do everything on the server.
-    #[serde(skip)]
-    #[clap(long, env = "MEILI_MASTER_KEY")]
+    /// Sets the instance's master key, automatically protecting all routes except GET /health
+    #[serde(skip_serializing)]
+    #[clap(long, env = MEILI_MASTER_KEY)]
    pub master_key: Option<String>,

    /// This environment variable must be set to `production` if you are running in production.
-    /// If the server is running in development mode more logs will be displayed,
-    /// and the master key can be avoided which implies that there is no security on the updates routes.
-    /// This is useful to debug when integrating the engine with another service.
-    #[clap(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)]
+    /// More logs wiil be displayed if the server is running in development mode. Setting the master
+    /// key is optional; hence no security on the updates routes. This
+    /// is useful to debug when integrating the engine with another service
+    #[clap(long, env = MEILI_ENV, default_value_t = default_env(), possible_values = &POSSIBLE_ENV)]
+    #[serde(default = "default_env")]
    pub env: String,

    /// Do not send analytics to Meili.
    #[cfg(all(not(debug_assertions), feature = "analytics"))]
-    #[serde(skip)] // we can't send true
-    #[clap(long, env = "MEILI_NO_ANALYTICS")]
+    #[serde(skip_serializing, default)] // we can't send true
+    #[clap(long, env = MEILI_NO_ANALYTICS)]
    pub no_analytics: bool,

-    /// The maximum size, in bytes, of the main lmdb database directory
-    #[clap(long, env = "MEILI_MAX_INDEX_SIZE", default_value = "100 GiB")]
+    /// The maximum size, in bytes, of the main LMDB database directory
+    #[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())]
+    #[serde(default = "default_max_index_size")]
    pub max_index_size: Byte,

-    /// The maximum size, in bytes, of the update lmdb database directory
-    #[clap(long, env = "MEILI_MAX_TASK_DB_SIZE", default_value = "100 GiB")]
+    /// The maximum size, in bytes, of the update LMDB database directory
+    #[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())]
+    #[serde(default = "default_max_task_db_size")]
    pub max_task_db_size: Byte,

    /// The maximum size, in bytes, of accepted JSON payloads
-    #[clap(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")]
+    #[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value_t = default_http_payload_size_limit())]
+    #[serde(default = "default_http_payload_size_limit")]
    pub http_payload_size_limit: Byte,

    /// Read server certificates from CERTFILE.
    /// This should contain PEM-format certificates
    /// in the right order (the first certificate should
    /// certify KEYFILE, the last should be a root CA).
-    #[serde(skip)]
-    #[clap(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))]
+    #[serde(skip_serializing)]
+    #[clap(long, env = MEILI_SSL_CERT_PATH, parse(from_os_str))]
    pub ssl_cert_path: Option<PathBuf>,

-    /// Read private key from KEYFILE.  This should be a RSA
+    /// Read the private key from KEYFILE.  This should be an RSA
    /// private key or PKCS8-encoded private key, in PEM format.
-    #[serde(skip)]
-    #[clap(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))]
+    #[serde(skip_serializing)]
+    #[clap(long, env = MEILI_SSL_KEY_PATH, parse(from_os_str))]
    pub ssl_key_path: Option<PathBuf>,

    /// Enable client authentication, and accept certificates
    /// signed by those roots provided in CERTFILE.
-    #[clap(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))]
-    #[serde(skip)]
+    #[serde(skip_serializing)]
+    #[clap(long, env = MEILI_SSL_AUTH_PATH, parse(from_os_str))]
    pub ssl_auth_path: Option<PathBuf>,

    /// Read DER-encoded OCSP response from OCSPFILE and staple to certificate.
    /// Optional
-    #[serde(skip)]
-    #[clap(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))]
+    #[serde(skip_serializing)]
+    #[clap(long, env = MEILI_SSL_OCSP_PATH, parse(from_os_str))]
    pub ssl_ocsp_path: Option<PathBuf>,

    /// Send a fatal alert if the client does not complete client authentication.
-    #[serde(skip)]
-    #[clap(long, env = "MEILI_SSL_REQUIRE_AUTH")]
+    #[serde(skip_serializing, default)]
+    #[clap(long, env = MEILI_SSL_REQUIRE_AUTH)]
    pub ssl_require_auth: bool,

    /// SSL support session resumption
-    #[serde(skip)]
-    #[clap(long, env = "MEILI_SSL_RESUMPTION")]
+    #[serde(skip_serializing, default)]
+    #[clap(long, env = MEILI_SSL_RESUMPTION)]
    pub ssl_resumption: bool,

    /// SSL support tickets.
-    #[serde(skip)]
-    #[clap(long, env = "MEILI_SSL_TICKETS")]
+    #[serde(skip_serializing, default)]
+    #[clap(long, env = MEILI_SSL_TICKETS)]
    pub ssl_tickets: bool,

    /// Defines the path of the snapshot file to import.
-    /// This option will, by default, stop the process if a database already exist or if no snapshot exists at
-    /// the given path. If this option is not specified no snapshot is imported.
-    #[clap(long)]
+    /// This option will, by default, stop the process if a database already exists, or if no snapshot exists at
+    /// the given path. If this option is not specified, no snapshot is imported.
+    #[clap(long, env = MEILI_IMPORT_SNAPSHOT)]
    pub import_snapshot: Option<PathBuf>,

-    /// The engine will ignore a missing snapshot and not return an error in such case.
-    #[clap(long, requires = "import-snapshot")]
+    /// The engine will ignore a missing snapshot and not return an error in such a case.
+    #[clap(
+        long,
+        env = MEILI_IGNORE_MISSING_SNAPSHOT,
+        requires = "import-snapshot"
+    )]
+    #[serde(default)]
    pub ignore_missing_snapshot: bool,

    /// The engine will skip snapshot importation and not return an error in such case.
-    #[clap(long, requires = "import-snapshot")]
+    #[clap(
+        long,
+        env = MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS,
+        requires = "import-snapshot"
+    )]
+    #[serde(default)]
    pub ignore_snapshot_if_db_exists: bool,

-    /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap.
-    #[clap(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")]
+    /// Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec.
+    #[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())]
+    #[serde(default = "default_snapshot_dir")]
    pub snapshot_dir: PathBuf,

    /// Activate snapshot scheduling.
-    #[clap(long, env = "MEILI_SCHEDULE_SNAPSHOT")]
+    #[clap(long, env = MEILI_SCHEDULE_SNAPSHOT)]
+    #[serde(default)]
    pub schedule_snapshot: bool,

    /// Defines time interval, in seconds, between each snapshot creation.
-    #[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h
+    #[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value_t = default_snapshot_interval_sec())]
+    #[serde(default = "default_snapshot_interval_sec")]
+    // 24h
    pub snapshot_interval_sec: u64,

    /// Import a dump from the specified path, must be a `.dump` file.
-    #[clap(long, conflicts_with = "import-snapshot")]
+    #[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import-snapshot")]
    pub import_dump: Option<PathBuf>,

-    /// If the dump doesn't exists, load or create the database specified by `db-path` instead.
-    #[clap(long, requires = "import-dump")]
+    /// If the dump doesn't exist, load or create the database specified by `db-path` instead.
+    #[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import-dump")]
+    #[serde(default)]
    pub ignore_missing_dump: bool,

    /// Ignore the dump if a database already exists, and load that database instead.
-    #[clap(long, requires = "import-dump")]
+    #[clap(long, env = MEILI_IGNORE_DUMP_IF_DB_EXISTS, requires = "import-dump")]
+    #[serde(default)]
    pub ignore_dump_if_db_exists: bool,

    /// Folder where dumps are created when the dump route is called.
-    #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
+    #[clap(long, env = MEILI_DUMPS_DIR, default_value_os_t = default_dumps_dir())]
+    #[serde(default = "default_dumps_dir")]
    pub dumps_dir: PathBuf,

-    /// Set the log level
-    #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")]
+    /// Set the log level. # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE]
+    #[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())]
+    #[serde(default = "default_log_level")]
    pub log_level: String,

    /// Enables Prometheus metrics and /metrics route.
    #[cfg(feature = "metrics")]
-    #[clap(long, env = "MEILI_ENABLE_METRICS_ROUTE")]
+    #[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)]
+    #[serde(default)]
    pub enable_metrics_route: bool,

    #[serde(flatten)]
@ -158,15 +228,139 @@ pub struct Opt {
    #[serde(flatten)]
    #[clap(flatten)]
    pub scheduler_options: SchedulerConfig,
+
+    /// The path to a configuration file that should be used to setup the engine.
+    /// Format must be TOML.
+    #[serde(skip_serializing)]
+    #[clap(long)]
+    pub config_file_path: Option<PathBuf>,
 }

 impl Opt {
-    /// Wether analytics should be enabled or not.
+    /// Whether analytics should be enabled or not.
    #[cfg(all(not(debug_assertions), feature = "analytics"))]
    pub fn analytics(&self) -> bool {
        !self.no_analytics
    }

+    /// Build a new Opt from config file, env vars and cli args.
+    pub fn try_build() -> anyhow::Result<(Self, Option<PathBuf>)> {
+        // Parse the args to get the config_file_path.
+        let mut opts = Opt::parse();
+        let mut config_read_from = None;
+        if let Some(config_file_path) = opts
+            .config_file_path
+            .clone()
+            .or_else(|| Some(PathBuf::from("./config.toml")))
+        {
+            match std::fs::read(&config_file_path) {
+                Ok(config) => {
+                    // If the file is successfully read, we deserialize it with `toml`.
+                    let opt_from_config = toml::from_slice::<Opt>(&config)?;
+                    // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args.
+                    opt_from_config.export_to_env();
+                    // Once injected we parse the cli args once again to take the new env vars into scope.
+                    opts = Opt::parse();
+                    config_read_from = Some(config_file_path);
+                }
+                // If we have an error while reading the file defined by the user.
+                Err(_) if opts.config_file_path.is_some() => anyhow::bail!(
+                    "unable to open or read the {:?} configuration file.",
+                    opts.config_file_path.unwrap().display().to_string()
+                ),
+                _ => (),
+            }
+        }
+
+        Ok((opts, config_read_from))
+    }
+
+    /// Exports the opts values to their corresponding env vars if they are not set.
+    fn export_to_env(self) {
+        let Opt {
+            db_path,
+            http_addr,
+            master_key,
+            env,
+            max_index_size,
+            max_task_db_size,
+            http_payload_size_limit,
+            ssl_cert_path,
+            ssl_key_path,
+            ssl_auth_path,
+            ssl_ocsp_path,
+            ssl_require_auth,
+            ssl_resumption,
+            ssl_tickets,
+            snapshot_dir,
+            schedule_snapshot,
+            snapshot_interval_sec,
+            dumps_dir,
+            log_level,
+            indexer_options,
+            scheduler_options,
+            import_snapshot: _,
+            ignore_missing_snapshot: _,
+            ignore_snapshot_if_db_exists: _,
+            import_dump: _,
+            ignore_missing_dump: _,
+            ignore_dump_if_db_exists: _,
+            config_file_path: _,
+            #[cfg(all(not(debug_assertions), feature = "analytics"))]
+            no_analytics,
+            #[cfg(feature = "metrics")]
+            enable_metrics_route,
+        } = self;
+        export_to_env_if_not_present(MEILI_DB_PATH, db_path);
+        export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
+        if let Some(master_key) = master_key {
+            export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
+        }
+        export_to_env_if_not_present(MEILI_ENV, env);
+        #[cfg(all(not(debug_assertions), feature = "analytics"))]
+        {
+            export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
+        }
+        export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, max_index_size.to_string());
+        export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, max_task_db_size.to_string());
+        export_to_env_if_not_present(
+            MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
+            http_payload_size_limit.to_string(),
+        );
+        if let Some(ssl_cert_path) = ssl_cert_path {
+            export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
+        }
+        if let Some(ssl_key_path) = ssl_key_path {
+            export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path);
+        }
+        if let Some(ssl_auth_path) = ssl_auth_path {
+            export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path);
+        }
+        if let Some(ssl_ocsp_path) = ssl_ocsp_path {
+            export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path);
+        }
+        export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string());
+        export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string());
+        export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string());
+        export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir);
+        export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, schedule_snapshot.to_string());
+        export_to_env_if_not_present(
+            MEILI_SNAPSHOT_INTERVAL_SEC,
+            snapshot_interval_sec.to_string(),
+        );
+        export_to_env_if_not_present(MEILI_DUMPS_DIR, dumps_dir);
+        export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level);
+        #[cfg(feature = "metrics")]
+        {
+            export_to_env_if_not_present(
+                MEILI_ENABLE_METRICS_ROUTE,
+                enable_metrics_route.to_string(),
+            );
+        }
+        indexer_options.export_to_env();
+        scheduler_options.export_to_env();
+    }
+
    pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
        if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
            let config = rustls::ServerConfig::builder().with_safe_defaults();
@ -265,6 +459,48 @@ fn load_ocsp(filename: &Option<PathBuf>) -> anyhow::Result<Vec<u8>> {
    Ok(ret)
 }

+/// Functions used to get default value for `Opt` fields, needs to be function because of serde's default attribute.
+
+fn default_db_path() -> PathBuf {
+    PathBuf::from(DEFAULT_DB_PATH)
+}
+
+fn default_http_addr() -> String {
+    DEFAULT_HTTP_ADDR.to_string()
+}
+
+fn default_env() -> String {
+    DEFAULT_ENV.to_string()
+}
+
+fn default_max_index_size() -> Byte {
+    Byte::from_str(DEFAULT_MAX_INDEX_SIZE).unwrap()
+}
+
+fn default_max_task_db_size() -> Byte {
+    Byte::from_str(DEFAULT_MAX_TASK_DB_SIZE).unwrap()
+}
+
+fn default_http_payload_size_limit() -> Byte {
+    Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap()
+}
+
+fn default_snapshot_dir() -> PathBuf {
+    PathBuf::from(DEFAULT_SNAPSHOT_DIR)
+}
+
+fn default_snapshot_interval_sec() -> u64 {
+    DEFAULT_SNAPSHOT_INTERVAL_SEC
+}
+
+fn default_dumps_dir() -> PathBuf {
+    PathBuf::from(DEFAULT_DUMPS_DIR)
+}
+
+fn default_log_level() -> String {
+    DEFAULT_LOG_LEVEL.to_string()
+}
+
 #[cfg(test)]
 mod test {
    use super::*;
--- a/meilisearch-http/src/task.rs
+++ b/meilisearch-http/src/task.rs
@ -147,7 +147,7 @@ enum TaskDetails {
    IndexInfo { primary_key: Option<String> },
    #[serde(rename_all = "camelCase")]
    DocumentDeletion {
-        received_document_ids: usize,
+        matched_documents: usize,
        deleted_documents: Option<u64>,
    },
    #[serde(rename_all = "camelCase")]
@ -255,7 +255,7 @@ impl From<Task> for TaskView {
            } => (
                TaskType::DocumentDeletion,
                Some(TaskDetails::DocumentDeletion {
-                    received_document_ids: ids.len(),
+                    matched_documents: ids.len(),
                    deleted_documents: None,
                }),
            ),
--- a/meilisearch-http/tests/dashboard/mod.rs
+++ b/meilisearch-http/tests/dashboard/mod.rs
@ -1,5 +1,6 @@
 use crate::common::Server;

+#[cfg(feature = "mini-dashboard")]
 #[actix_rt::test]
 async fn dashboard_assets_load() {
    let server = Server::new().await;
--- a/meilisearch-http/tests/documents/add_documents.rs
+++ b/meilisearch-http/tests/documents/add_documents.rs
@ -372,7 +372,7 @@ async fn error_add_malformed_json_documents() {
    assert_eq!(
        response["message"],
        json!(
-            r#"The `json` payload provided is malformed. `Couldn't serialize document value: data did not match any variant of untagged enum Either`."#
+            r#"The `json` payload provided is malformed. `Couldn't serialize document value: data are neither an object nor a list of objects`."#
        )
    );
    assert_eq!(response["code"], json!("malformed_payload"));
@ -395,7 +395,7 @@ async fn error_add_malformed_json_documents() {
    assert_eq!(status_code, 400);
    assert_eq!(
        response["message"],
-        json!("The `json` payload provided is malformed. `Couldn't serialize document value: data did not match any variant of untagged enum Either`.")
+        json!("The `json` payload provided is malformed. `Couldn't serialize document value: data are neither an object nor a list of objects`.")
    );
    assert_eq!(response["code"], json!("malformed_payload"));
    assert_eq!(response["type"], json!("invalid_request"));
--- a/meilisearch-lib/Cargo.toml
+++ b/meilisearch-lib/Cargo.toml
@ -28,7 +28,7 @@ lazy_static = "1.4.0"
 log = "0.4.14"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
-milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" }
+milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false }
 mime = "0.3.16"
 num_cpus = "1.13.1"
 obkv = "0.2.0"
@ -64,3 +64,19 @@ nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f1388554
 paste = "1.0.6"
 proptest = "1.0.0"
 proptest-derive = "0.3.0"
+
+[features]
+# all specialized tokenizations
+default = ["milli/default"]
+
+# chinese specialized tokenization
+chinese = ["milli/chinese"]
+
+# hebrew specialized tokenization
+hebrew = ["milli/hebrew"]
+
+# japanese specialized tokenization
+japanese = ["milli/japanese"]
+
+# thai specialized tokenization
+thai = ["milli/thai"]
--- a/meilisearch-lib/src/document_formats.rs
+++ b/meilisearch-lib/src/document_formats.rs
@ -8,6 +8,7 @@ use meilisearch_types::internal_error;
 use milli::documents::{DocumentsBatchBuilder, Error};
 use milli::Object;
 use serde::Deserialize;
+use serde_json::error::Category;

 type Result<T> = std::result::Result<T, DocumentFormatError>;

@ -40,18 +41,32 @@ impl Display for DocumentFormatError {
            Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e),
            Self::MalformedPayload(me, b) => match me.borrow() {
                Error::Json(se) => {
+                    let mut message = match se.classify() {
+                        Category::Data => {
+                            "data are neither an object nor a list of objects".to_string()
+                        }
+                        _ => se.to_string(),
+                    };
+
                    // https://github.com/meilisearch/meilisearch/issues/2107
                    // The user input maybe insanely long. We need to truncate it.
-                    let mut serde_msg = se.to_string();
                    let ellipsis = "...";
-                    if serde_msg.len() > 100 + ellipsis.len() {
-                        serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis);
+                    let trim_input_prefix_len = 50;
+                    let trim_input_suffix_len = 85;
+
+                    if message.len()
+                        > trim_input_prefix_len + trim_input_suffix_len + ellipsis.len()
+                    {
+                        message.replace_range(
+                            trim_input_prefix_len..message.len() - trim_input_suffix_len,
+                            ellipsis,
+                        );
                    }

                    write!(
                        f,
                        "The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.",
-                        b, serde_msg
+                        b, message
                )
                }
                _ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me),
--- a/meilisearch-lib/src/dump/compat/v2.rs
+++ b/meilisearch-lib/src/dump/compat/v2.rs
@ -145,7 +145,7 @@ pub fn error_code_from_str(s: &str) -> anyhow::Result<Code> {
        "unsupported_media_type" => Code::UnsupportedMediaType,
        "dump_already_in_progress" => Code::DumpAlreadyInProgress,
        "dump_process_failed" => Code::DumpProcessFailed,
-        _ => bail!("unknow error code."),
+        _ => bail!("unknown error code."),
    };

    Ok(code)
--- a/meilisearch-lib/src/dump/loaders/v1.rs
+++ b/meilisearch-lib/src/dump/loaders/v1.rs
@ -1,24 +0,0 @@
-use std::path::Path;
-
-use serde::{Deserialize, Serialize};
-
-use crate::index_controller::IndexMetadata;
-
-#[derive(Serialize, Deserialize, Debug)]
-#[serde(rename_all = "camelCase")]
-pub struct MetadataV1 {
-    pub db_version: String,
-    indexes: Vec<IndexMetadata>,
-}
-
-impl MetadataV1 {
-    #[allow(dead_code, unreachable_code, unused_variables)]
-    pub fn load_dump(
-        self,
-        src: impl AsRef<Path>,
-        dst: impl AsRef<Path>,
-        size: usize,
-        indexer_options: &IndexerOpts,
-    ) -> anyhow::Result<()> {
-        anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
-}
--- a/meilisearch-lib/src/dump/loaders/v4.rs
+++ b/meilisearch-lib/src/dump/loaders/v4.rs
@ -57,10 +57,10 @@ fn patch_updates(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result
    let updates_path = src.as_ref().join("updates/data.jsonl");
    let output_updates_path = dst.as_ref().join("updates/data.jsonl");
    create_dir_all(output_updates_path.parent().unwrap())?;
-    let udpates_file = File::open(updates_path)?;
+    let updates_file = File::open(updates_path)?;
    let mut output_update_file = File::create(output_updates_path)?;

-    serde_json::Deserializer::from_reader(udpates_file)
+    serde_json::Deserializer::from_reader(updates_file)
        .into_iter::<compat::v4::Task>()
        .try_for_each(|task| -> anyhow::Result<()> {
            let task: Task = task?.into();
--- a/meilisearch-lib/src/index/dump.rs
+++ b/meilisearch-lib/src/index/dump.rs
@ -105,6 +105,7 @@ impl Index {

        let mut options = EnvOpenOptions::new();
        options.map_size(size);
+        options.max_readers(1024);
        let index = milli::Index::new(options, &dst_dir_path)?;

        let mut txn = index.write_txn()?;
--- a/meilisearch-lib/src/index/index.rs
+++ b/meilisearch-lib/src/index/index.rs
@ -94,6 +94,7 @@ impl Index {
        create_dir_all(&path)?;
        let mut options = EnvOpenOptions::new();
        options.map_size(size);
+        options.max_readers(1024);
        let inner = Arc::new(milli::Index::new(options, &path)?);
        Ok(Index {
            inner,
--- a/meilisearch-lib/src/index/search.rs
+++ b/meilisearch-lib/src/index/search.rs
@ -27,7 +27,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
 pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
 pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();

-/// The maximimum number of results that the engine
+/// The maximum number of results that the engine
 /// will be able to return in one search call.
 pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;

--- a/meilisearch-lib/src/index_resolver/index_store.rs
+++ b/meilisearch-lib/src/index_resolver/index_store.rs
@ -51,7 +51,7 @@ impl MapIndexStore {
 #[async_trait::async_trait]
 impl IndexStore for MapIndexStore {
    async fn create(&self, uuid: Uuid) -> Result<Index> {
-        // We need to keep the lock until we are sure the db file has been opened correclty, to
+        // We need to keep the lock until we are sure the db file has been opened correctly, to
        // ensure that another db is not created at the same time.
        let mut lock = self.index_store.write().await;

--- a/meilisearch-lib/src/lib.rs
+++ b/meilisearch-lib/src/lib.rs
@ -11,6 +11,8 @@ mod snapshot;
 pub mod tasks;
 mod update_file_store;

+use std::env::VarError;
+use std::ffi::OsStr;
 use std::path::Path;

 pub use index_controller::MeiliSearch;
@ -35,3 +37,14 @@ pub fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
        true
    }
 }
+
+/// Checks if the key is defined in the environment variables.
+/// If not, inserts it with the given value.
+pub fn export_to_env_if_not_present<T>(key: &str, value: T)
+where
+    T: AsRef<OsStr>,
+{
+    if let Err(VarError::NotPresent) = std::env::var(key) {
+        std::env::set_var(key, value);
+    }
+}
--- a/meilisearch-lib/src/options.rs
+++ b/meilisearch-lib/src/options.rs
@ -1,33 +1,40 @@
+use crate::export_to_env_if_not_present;
+
 use core::fmt;
 use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr};

 use byte_unit::{Byte, ByteError};
 use clap::Parser;
 use milli::update::IndexerConfig;
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use sysinfo::{RefreshKind, System, SystemExt};

-#[derive(Debug, Clone, Parser, Serialize)]
+const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
+const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
+const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING";
+const DEFAULT_LOG_EVERY_N: usize = 100000;
+
+#[derive(Debug, Clone, Parser, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case", deny_unknown_fields)]
 pub struct IndexerOpts {
    /// The amount of documents to skip before printing
    /// a log regarding the indexing advancement.
-    #[serde(skip)]
-    #[clap(long, default_value = "100000", hide = true)] // 100k
+    #[serde(skip_serializing, default = "default_log_every_n")]
+    #[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k
    pub log_every_n: usize,

    /// Grenad max number of chunks in bytes.
-    #[serde(skip)]
+    #[serde(skip_serializing)]
    #[clap(long, hide = true)]
    pub max_nb_chunks: Option<usize>,

-    /// The maximum amount of memory the indexer will use. It defaults to 2/3
-    /// of the available memory. It is recommended to use something like 80%-90%
-    /// of the available memory, no more.
+    /// The maximum amount of memory the indexer will use.
    ///
    /// In case the engine is unable to retrieve the available memory the engine will
    /// try to use the memory it needs but without real limit, this can lead to
    /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
-    #[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)]
+    #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
+    #[serde(default)]
    pub max_indexing_memory: MaxMemory,

    /// The maximum number of threads the indexer will use.
@ -35,18 +42,43 @@ pub struct IndexerOpts {
    /// it will use the maximum number of available cores.
    ///
    /// It defaults to half of the available threads.
-    #[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)]
+    #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)]
+    #[serde(default)]
    pub max_indexing_threads: MaxThreads,
 }

-#[derive(Debug, Clone, Parser, Default, Serialize)]
+#[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case", deny_unknown_fields)]
 pub struct SchedulerConfig {
    /// The engine will disable task auto-batching,
    /// and will sequencialy compute each task one by one.
-    #[clap(long, env = "DISABLE_AUTO_BATCHING")]
+    #[clap(long, env = DISABLE_AUTO_BATCHING)]
+    #[serde(default)]
    pub disable_auto_batching: bool,
 }

+impl IndexerOpts {
+    /// Exports the values to their corresponding env vars if they are not set.
+    pub fn export_to_env(self) {
+        let IndexerOpts {
+            max_indexing_memory,
+            max_indexing_threads,
+            log_every_n: _,
+            max_nb_chunks: _,
+        } = self;
+        if let Some(max_indexing_memory) = max_indexing_memory.0 {
+            export_to_env_if_not_present(
+                MEILI_MAX_INDEXING_MEMORY,
+                max_indexing_memory.to_string(),
+            );
+        }
+        export_to_env_if_not_present(
+            MEILI_MAX_INDEXING_THREADS,
+            max_indexing_threads.0.to_string(),
+        );
+    }
+}
+
 impl TryFrom<&IndexerOpts> for IndexerConfig {
    type Error = anyhow::Error;

@ -77,8 +109,17 @@ impl Default for IndexerOpts {
    }
 }

+impl SchedulerConfig {
+    pub fn export_to_env(self) {
+        let SchedulerConfig {
+            disable_auto_batching,
+        } = self;
+        export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string());
+    }
+}
+
 /// A type used to detect the max memory available and use 2/3 of it.
-#[derive(Debug, Clone, Copy, Serialize)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
 pub struct MaxMemory(Option<Byte>);

 impl FromStr for MaxMemory {
@ -134,7 +175,7 @@ fn total_memory_bytes() -> Option<u64> {
    }
 }

-#[derive(Debug, Clone, Copy, Serialize)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
 pub struct MaxThreads(usize);

 impl FromStr for MaxThreads {
@ -164,3 +205,7 @@ impl Deref for MaxThreads {
        &self.0
    }
 }
+
+fn default_log_every_n() -> usize {
+    DEFAULT_LOG_EVERY_N
+}
--- a/meilisearch-lib/src/snapshot.rs
+++ b/meilisearch-lib/src/snapshot.rs
@ -181,6 +181,7 @@ impl SnapshotJob {

            let mut options = milli::heed::EnvOpenOptions::new();
            options.map_size(self.index_size);
+            options.max_readers(1024);
            let index = milli::Index::new(options, entry.path())?;
            index.copy_to_path(dst, CompactionOption::Enabled)?;
        }
--- a/meilisearch-lib/src/tasks/task_store/mod.rs
+++ b/meilisearch-lib/src/tasks/task_store/mod.rs
@ -117,7 +117,7 @@ impl TaskStore {
        match filter {
            Some(filter) => filter
                .pass(&task)
-                .then(|| task)
+                .then_some(task)
                .ok_or(TaskError::UnexistingTask(id)),
            None => Ok(task),
        }
--- a/meilisearch-lib/src/tasks/task_store/store.rs
+++ b/meilisearch-lib/src/tasks/task_store/store.rs
@ -63,7 +63,7 @@ impl Store {
    /// Returns the id for the next task.
    ///
    /// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit
-    /// the task to the store in the same transaction, no one else will hav this task id.
+    /// the task to the store in the same transaction, no one else will have this task id.
    pub fn next_task_id(&self, txn: &mut RwTxn) -> Result<TaskId> {
        let id = self
            .tasks