update space-1/space-2 links for access
This commit is contained in:
parent
ce72b3a829
commit
922220d723
|
@ -0,0 +1,70 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="AutoImportSettings">
|
||||
<option name="autoReloadType" value="SELECTIVE" />
|
||||
</component>
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="079f724a-dabb-42e0-84ee-a850f914d5ae" name="Changes" comment="">
|
||||
<change beforePath="$PROJECT_DIR$/space-1/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/space-1/README.md" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/space-2/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/space-2/README.md" afterDir="false" />
|
||||
</list>
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="MarkdownSettingsMigration">
|
||||
<option name="stateVersion" value="1" />
|
||||
</component>
|
||||
<component name="ProjectId" id="2Mb9j54td8k53KgHRf98pcsfkaC" />
|
||||
<component name="ProjectViewState">
|
||||
<option name="hideEmptyMiddlePackages" value="true" />
|
||||
<option name="showLibraryContents" value="true" />
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
|
||||
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
|
||||
<property name="WebServerToolWindowFactoryState" value="true" />
|
||||
<property name="last_opened_file_path" value="$PROJECT_DIR$/space-2" />
|
||||
<property name="node.js.detected.package.eslint" value="true" />
|
||||
<property name="node.js.detected.package.tslint" value="true" />
|
||||
<property name="node.js.selected.package.eslint" value="(autodetect)" />
|
||||
<property name="node.js.selected.package.tslint" value="(autodetect)" />
|
||||
</component>
|
||||
<component name="RecentsManager">
|
||||
<key name="CopyFile.RECENT_KEYS">
|
||||
<recent name="$PROJECT_DIR$/space-2" />
|
||||
</key>
|
||||
</component>
|
||||
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="Default task">
|
||||
<changelist id="079f724a-dabb-42e0-84ee-a850f914d5ae" name="Changes" comment="" />
|
||||
<created>1678025547077</created>
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1678025547077</updated>
|
||||
<workItem from="1678025548212" duration="147000" />
|
||||
<workItem from="1678025785276" duration="1409000" />
|
||||
<workItem from="1678296002630" duration="833000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TypeScriptGeneratedFilesManager">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
<component name="Vcs.Log.Tabs.Properties">
|
||||
<option name="TAB_STATES">
|
||||
<map>
|
||||
<entry key="MAIN">
|
||||
<value>
|
||||
<State />
|
||||
</value>
|
||||
</entry>
|
||||
</map>
|
||||
</option>
|
||||
</component>
|
||||
</project>
|
|
@ -57,11 +57,11 @@ The downloaded zip file `data.zip` contains pre-training corpora and four TOD be
|
|||
|
||||
## Pre-training
|
||||
### Pre-training Corpora
|
||||
- [UniDA](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE1.0/Pre-training%20Data.zip): a new labeled dialog dataset consisting of 975,780 utterances, which are annotated with 20 frequently-used DAs, according to our proposed comprehensive unified DA taxonomy for task-oriented dialog.
|
||||
- [UnDial](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE1.0/Pre-training%20Data.zip): a large-scale unlabeled dialog dataset consisting of 35M utterances with careful processing, ranging from online forum chatting logs to customer service conversations.
|
||||
- [UniDA](https://drive.google.com/file/d/146ZPNI_FDKNX0xd_iErmD8etA5yj5mox/view?usp=share_link): a new labeled dialog dataset consisting of 975,780 utterances, which are annotated with 20 frequently-used DAs, according to our proposed comprehensive unified DA taxonomy for task-oriented dialog.
|
||||
- [UnDial](https://drive.google.com/file/d/1-1CEyd1gPJL8r9Na6aD9Wq0mHg-fAaI8/view?usp=share_link): a large-scale unlabeled dialog dataset consisting of 35M utterances with careful processing, ranging from online forum chatting logs to customer service conversations.
|
||||
|
||||
### Pre-trained Checkpoint
|
||||
- [SPACE1.0](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE1.0/model.zip): an uncased model with DA classification head (12-layers, 768-hidden, 12-heads, 109M parameters)
|
||||
- [SPACE1.0](https://drive.google.com/file/d/18NPZQ6SH9Q0nFZenf_hNyuJTyT9IFAjL/view?usp=share_link): an uncased model with DA classification head (12-layers, 768-hidden, 12-heads, 109M parameters)
|
||||
|
||||
You need to unzip the downloaded model file `model.zip`, then put the unzipped directory `model` into the project directory `SPACE1.0` for the further fine-tuning.
|
||||
|
||||
|
@ -84,7 +84,7 @@ sh scripts/pre_train/train_multi.sh
|
|||
|
||||
## Fine-tuning
|
||||
### Fine-tuned Checkpoints
|
||||
Download checkpoints from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE1.0/outputs.zip).
|
||||
Download checkpoints from this [link](https://drive.google.com/file/d/1JerSwvLzes6b-igQ7lPCTIrh6IvrTMK6/view?usp=share_link).
|
||||
|
||||
The downloaded zip file `outputs.zip` contains our best fine-tuned checkpoints on different datasets:
|
||||
- the **7-th** epoch on MultiWOZ2.0 (**60** training epochs in total)
|
||||
|
|
|
@ -49,11 +49,11 @@ SAVE_ROOT=<YOUR_SAVE_PATH>/${PROJECT_NAME} # root path of model's output
|
|||
```
|
||||
|
||||
### Data Preparation
|
||||
Download data-split1 from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/data.zip).
|
||||
Download data-split1 from this [link](https://drive.google.com/file/d/1ocwnuOLxB3VzngeWZsm59IRrhEv22Scx/view?usp=share_link).
|
||||
|
||||
The downloaded zip file `data.zip` contains pre-training corpora (including BANKING77, CLINC150 and HWU64) and three extra task-oriented (TOD) benchmark datasets: REST8K, DSTC8 and TOP, which have already been processed. You need to put the unzipped directory `data` into the project directory `SPACE2.0` for the subsequent training.
|
||||
|
||||
Download data-split2 from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/trippy/data.zip).
|
||||
Download data-split2 from this [link](https://drive.google.com/file/d/1BZvlARzxXobjpQQRWvkF3jwnLN9-9c-n/view?usp=share_link).
|
||||
|
||||
The downloaded zip file `data.zip` contains one TOD benchmark dataset: MultiWOZ2.1, which have already been processed. You need to put the unzipped directory `data` into the directory `SPACE2.0/trippy` for the subsequent training.
|
||||
|
||||
|
@ -77,12 +77,12 @@ SPACE2.0/
|
|||
|
||||
## Pre-training
|
||||
### Pre-training Corpora
|
||||
- [AnPreDial](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/AnPreDial.zip): a new labeled dialog dataset annotated with semantic trees, which contains 32 existing labeled TOD datasets with 3
|
||||
- [AnPreDial](https://drive.google.com/file/d/1ocwnuOLxB3VzngeWZsm59IRrhEv22Scx/view?usp=share_link): a new labeled dialog dataset annotated with semantic trees, which contains 32 existing labeled TOD datasets with 3
|
||||
million turns, ranging from single-turn QA to multi-turn dialogs.
|
||||
- [UnPreDial](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/UnPreDial.zip): a large-scale unlabeled dialog dataset consisting of 19M utterances with careful processing from 21 online dialog corpora, ranging from online forums to conversational machine reading comprehension.
|
||||
- [UnPreDial](https://drive.google.com/file/d/1ocwnuOLxB3VzngeWZsm59IRrhEv22Scx/view?usp=share_link): a large-scale unlabeled dialog dataset consisting of 19M utterances with careful processing from 21 online dialog corpora, ranging from online forums to conversational machine reading comprehension.
|
||||
|
||||
### Pre-trained Checkpoint
|
||||
- [SPACE2.0](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/model.zip): an uncased model (12-layers, 768-hidden, 12-heads, 110M parameters)
|
||||
- [SPACE2.0](https://drive.google.com/file/d/1QOhrd_kB8VXevEAo1Gohr58LxMI4OjYo/view?usp=share_link): an uncased model (12-layers, 768-hidden, 12-heads, 110M parameters)
|
||||
|
||||
You need to unzip the downloaded model file `model.zip`, then put the unzipped directory `model` into the project directory `SPACE2.0` for the further fine-tuning.
|
||||
|
||||
|
@ -100,7 +100,7 @@ sh scripts/pre_train/train.sh
|
|||
|
||||
## Fine-tuning
|
||||
### Fine-tuned Checkpoints
|
||||
Download checkpoints-split1 from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/outputs.zip).
|
||||
Download checkpoints-split1 from this [link](https://drive.google.com/file/d/10QEEMNsjO5rH0ZRsJBj9zkDc5ozxc3Ch/view?usp=share_link).
|
||||
|
||||
The downloaded zip file `outputs.zip` contains our best fine-tuned checkpoints on the following six datasets:
|
||||
- BANKING77, CLINC150, HWU64 (**Intent Prediction**)
|
||||
|
@ -109,7 +109,7 @@ The downloaded zip file `outputs.zip` contains our best fine-tuned checkpoints o
|
|||
|
||||
If you want to reproduce our reported results, you should put the unzipped directory `outputs` into the directory `${SAVE_ROOT}` (set in scripts).
|
||||
|
||||
Download checkpoints-split2 from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/trippy/outputs.zip).
|
||||
Download checkpoints-split2 from this [link](https://drive.google.com/file/d/1G7K6AIBcRTC3CgMtSZdJ_TM6rFeXGe96/view?usp=share_link).
|
||||
|
||||
The downloaded zip file `outputs.zip` contains our best fine-tuned checkpoints on one dataset:
|
||||
- MultiWOZ2.1 (**Dialog State Tracking**)
|
||||
|
@ -159,7 +159,7 @@ sh scripts/multiwoz21/train.sh
|
|||
|
||||
> **NOTE**: You can skip Step 1 if you directly download the output model of Step 1.
|
||||
> For DST task, you should convert model parameters into Hugging Face format.
|
||||
> So you can download the model file from this [link](http://datarepo0.oss-cn-hangzhou-zmf.aliyuncs.com/Alibaba/SPACE2/trippy/model.zip) directly.
|
||||
> So you can download the model file from this [link](https://drive.google.com/file/d/1xzKhKBg0hJPAq1NebluLIwfVxnfN1-1R/view?usp=share_link) directly.
|
||||
> Then you need to unzip the downloaded model file `model.zip`, and put the unzipped directory `model` into the directory `SPACE2.0/trippy` for the further fine-tuning.
|
||||
|
||||
### Inference
|
||||
|
|
Loading…
Reference in New Issue