update space-1/space-2 links for access

This commit is contained in:
HwwAncient 2023-03-09 01:36:08 +08:00
parent ce72b3a829
commit 922220d723
3 changed files with 82 additions and 12 deletions

70
.idea/workspace.xml Normal file
View File

@ -0,0 +1,70 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="AutoImportSettings">
<option name="autoReloadType" value="SELECTIVE" />
</component>
<component name="ChangeListManager">
<list default="true" id="079f724a-dabb-42e0-84ee-a850f914d5ae" name="Changes" comment="">
<change beforePath="$PROJECT_DIR$/space-1/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/space-1/README.md" afterDir="false" />
<change beforePath="$PROJECT_DIR$/space-2/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/space-2/README.md" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="MarkdownSettingsMigration">
<option name="stateVersion" value="1" />
</component>
<component name="ProjectId" id="2Mb9j54td8k53KgHRf98pcsfkaC" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">
<property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
<property name="WebServerToolWindowFactoryState" value="true" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/space-2" />
<property name="node.js.detected.package.eslint" value="true" />
<property name="node.js.detected.package.tslint" value="true" />
<property name="node.js.selected.package.eslint" value="(autodetect)" />
<property name="node.js.selected.package.tslint" value="(autodetect)" />
</component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$/space-2" />
</key>
</component>
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="079f724a-dabb-42e0-84ee-a850f914d5ae" name="Changes" comment="" />
<created>1678025547077</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1678025547077</updated>
<workItem from="1678025548212" duration="147000" />
<workItem from="1678025785276" duration="1409000" />
<workItem from="1678296002630" duration="833000" />
</task>
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="3" />
</component>
<component name="Vcs.Log.Tabs.Properties">
<option name="TAB_STATES">
<map>
<entry key="MAIN">
<value>
<State />
</value>
</entry>
</map>
</option>
</component>
</project>

View File

@ -57,11 +57,11 @@ The downloaded zip file `data.zip` contains pre-training corpora and four TOD be
## Pre-training
### Pre-training Corpora
- [UniDA](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE1.0/Pre-training%20Data.zip): a new labeled dialog dataset consisting of 975,780 utterances, which are annotated with 20 frequently-used DAs, according to our proposed comprehensive unified DA taxonomy for task-oriented dialog.
- [UnDial](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE1.0/Pre-training%20Data.zip): a large-scale unlabeled dialog dataset consisting of 35M utterances with careful processing, ranging from online forum chatting logs to customer service conversations.
- [UniDA](https://drive.google.com/file/d/146ZPNI_FDKNX0xd_iErmD8etA5yj5mox/view?usp=share_link): a new labeled dialog dataset consisting of 975,780 utterances, which are annotated with 20 frequently-used DAs, according to our proposed comprehensive unified DA taxonomy for task-oriented dialog.
- [UnDial](https://drive.google.com/file/d/1-1CEyd1gPJL8r9Na6aD9Wq0mHg-fAaI8/view?usp=share_link): a large-scale unlabeled dialog dataset consisting of 35M utterances with careful processing, ranging from online forum chatting logs to customer service conversations.
### Pre-trained Checkpoint
- [SPACE1.0](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE1.0/model.zip): an uncased model with DA classification head (12-layers, 768-hidden, 12-heads, 109M parameters)
- [SPACE1.0](https://drive.google.com/file/d/18NPZQ6SH9Q0nFZenf_hNyuJTyT9IFAjL/view?usp=share_link): an uncased model with DA classification head (12-layers, 768-hidden, 12-heads, 109M parameters)
You need to unzip the downloaded model file `model.zip`, then put the unzipped directory `model` into the project directory `SPACE1.0` for the further fine-tuning.
@ -84,7 +84,7 @@ sh scripts/pre_train/train_multi.sh
## Fine-tuning
### Fine-tuned Checkpoints
Download checkpoints from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE1.0/outputs.zip).
Download checkpoints from this [link](https://drive.google.com/file/d/1JerSwvLzes6b-igQ7lPCTIrh6IvrTMK6/view?usp=share_link).
The downloaded zip file `outputs.zip` contains our best fine-tuned checkpoints on different datasets:
- the **7-th** epoch on MultiWOZ2.0 (**60** training epochs in total)

View File

@ -49,11 +49,11 @@ SAVE_ROOT=<YOUR_SAVE_PATH>/${PROJECT_NAME} # root path of model's output
```
### Data Preparation
Download data-split1 from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/data.zip).
Download data-split1 from this [link](https://drive.google.com/file/d/1ocwnuOLxB3VzngeWZsm59IRrhEv22Scx/view?usp=share_link).
The downloaded zip file `data.zip` contains pre-training corpora (including BANKING77, CLINC150 and HWU64) and three extra task-oriented (TOD) benchmark datasets: REST8K, DSTC8 and TOP, which have already been processed. You need to put the unzipped directory `data` into the project directory `SPACE2.0` for the subsequent training.
Download data-split2 from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/trippy/data.zip).
Download data-split2 from this [link](https://drive.google.com/file/d/1BZvlARzxXobjpQQRWvkF3jwnLN9-9c-n/view?usp=share_link).
The downloaded zip file `data.zip` contains one TOD benchmark dataset: MultiWOZ2.1, which have already been processed. You need to put the unzipped directory `data` into the directory `SPACE2.0/trippy` for the subsequent training.
@ -77,12 +77,12 @@ SPACE2.0/
## Pre-training
### Pre-training Corpora
- [AnPreDial](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/AnPreDial.zip): a new labeled dialog dataset annotated with semantic trees, which contains 32 existing labeled TOD datasets with 3
- [AnPreDial](https://drive.google.com/file/d/1ocwnuOLxB3VzngeWZsm59IRrhEv22Scx/view?usp=share_link): a new labeled dialog dataset annotated with semantic trees, which contains 32 existing labeled TOD datasets with 3
million turns, ranging from single-turn QA to multi-turn dialogs.
- [UnPreDial](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/UnPreDial.zip): a large-scale unlabeled dialog dataset consisting of 19M utterances with careful processing from 21 online dialog corpora, ranging from online forums to conversational machine reading comprehension.
- [UnPreDial](https://drive.google.com/file/d/1ocwnuOLxB3VzngeWZsm59IRrhEv22Scx/view?usp=share_link): a large-scale unlabeled dialog dataset consisting of 19M utterances with careful processing from 21 online dialog corpora, ranging from online forums to conversational machine reading comprehension.
### Pre-trained Checkpoint
- [SPACE2.0](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/model.zip): an uncased model (12-layers, 768-hidden, 12-heads, 110M parameters)
- [SPACE2.0](https://drive.google.com/file/d/1QOhrd_kB8VXevEAo1Gohr58LxMI4OjYo/view?usp=share_link): an uncased model (12-layers, 768-hidden, 12-heads, 110M parameters)
You need to unzip the downloaded model file `model.zip`, then put the unzipped directory `model` into the project directory `SPACE2.0` for the further fine-tuning.
@ -100,7 +100,7 @@ sh scripts/pre_train/train.sh
## Fine-tuning
### Fine-tuned Checkpoints
Download checkpoints-split1 from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/outputs.zip).
Download checkpoints-split1 from this [link](https://drive.google.com/file/d/10QEEMNsjO5rH0ZRsJBj9zkDc5ozxc3Ch/view?usp=share_link).
The downloaded zip file `outputs.zip` contains our best fine-tuned checkpoints on the following six datasets:
- BANKING77, CLINC150, HWU64 (**Intent Prediction**)
@ -109,7 +109,7 @@ The downloaded zip file `outputs.zip` contains our best fine-tuned checkpoints o
If you want to reproduce our reported results, you should put the unzipped directory `outputs` into the directory `${SAVE_ROOT}` (set in scripts).
Download checkpoints-split2 from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/trippy/outputs.zip).
Download checkpoints-split2 from this [link](https://drive.google.com/file/d/1G7K6AIBcRTC3CgMtSZdJ_TM6rFeXGe96/view?usp=share_link).
The downloaded zip file `outputs.zip` contains our best fine-tuned checkpoints on one dataset:
- MultiWOZ2.1 (**Dialog State Tracking**)
@ -159,7 +159,7 @@ sh scripts/multiwoz21/train.sh
> **NOTE**: You can skip Step 1 if you directly download the output model of Step 1.
> For DST task, you should convert model parameters into Hugging Face format.
> So you can download the model file from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/trippy/model.zip) directly.
> So you can download the model file from this [link](https://drive.google.com/file/d/1xzKhKBg0hJPAq1NebluLIwfVxnfN1-1R/view?usp=share_link) directly.
> Then you need to unzip the downloaded model file `model.zip`, and put the unzipped directory `model` into the directory `SPACE2.0/trippy` for the further fine-tuning.
### Inference