diff --git a/.gitattributes b/.gitattributes index 6b275ca1d952a102435fe955febf1781c9f8cd5c..5eeaee6157020e9bc8a4d3c4f85e7c5e54f90e20 100644 --- a/.gitattributes +++ b/.gitattributes @@ -261,3 +261,5 @@ data/AB/1976/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text data/AB/1977/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text data/AB/1975/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text data/train_NER/1891_20026449_corrected_german.txt filter=lfs diff=lfs merge=lfs -text +data/train_NER/models/de_1891_20026449/** filter=lfs diff=lfs merge=lfs -text +data/train_NER/1891_20026449_corrected_german_trained.txt filter=lfs diff=lfs merge=lfs -text diff --git a/.renku/workflow/be563b7060014d2d8483167855106043_python.cwl b/.renku/workflow/be563b7060014d2d8483167855106043_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..a6a491a04fea67bc0fb61836c429a7e93f74b759 --- /dev/null +++ b/.renku/workflow/be563b7060014d2d8483167855106043_python.cwl @@ -0,0 +1,79 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_train_ner.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: de_core_news_sm + inputBinding: + position: 2 + prefix: -m + separate: true + shellQuote: true + streamable: false + type: string + input_3: + default: 2 + inputBinding: + position: 3 + prefix: -n + separate: true + shellQuote: true + streamable: false + type: int + input_4: + default: + class: File + path: ../../data/train_NER/1891_20026449_corrected_german.txt + inputBinding: + position: 4 + prefix: -t + separate: true + shellQuote: true + streamable: false + type: File + input_5: + default: data/train_NER/models/de_1891_20026449 + inputBinding: + position: 5 + prefix: -o + separate: true + shellQuote: true + streamable: false + type: string + input_6: + default: data/train_NER/1891_20026449_corrected_german_trained.txt + inputBinding: + position: 6 + prefix: -u + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_6) + streamable: false + type: File + output_1: + outputBinding: + glob: $(inputs.input_5) + streamable: false + type: Directory +permanentFailCodes: [] +requirements: [] +successCodes: [] +temporaryFailCodes: [] diff --git a/data/train_NER/1891_20026449_corrected_german_trained.txt b/data/train_NER/1891_20026449_corrected_german_trained.txt new file mode 100644 index 0000000000000000000000000000000000000000..82eb54ee6be9c67b1d340b7dfc620ffb4904b443 --- /dev/null +++ b/data/train_NER/1891_20026449_corrected_german_trained.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b862240f5c6f4fd810410ef98cf54005a1377c03561edfe388ba7de82ad4635a +size 15774 diff --git a/data/train_NER/models/de_1891_20026449/meta.json b/data/train_NER/models/de_1891_20026449/meta.json new file mode 100644 index 0000000000000000000000000000000000000000..08769601067376e373e8d9691b10ffad69d35a0e --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/meta.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a226dd8612134435dc4cffb3f72e801a626529a7d483950456f18aade3d4e408 +size 1177 diff --git a/data/train_NER/models/de_1891_20026449/ner/cfg b/data/train_NER/models/de_1891_20026449/ner/cfg new file mode 100644 index 0000000000000000000000000000000000000000..34362357928f0173bca17ce0648b2f58f75cd6de --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/ner/cfg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caba14fac1acf3633284ef918a6e23cbaf6104315f3592d17ff65d5902984db7 +size 315 diff --git a/data/train_NER/models/de_1891_20026449/ner/lower_model b/data/train_NER/models/de_1891_20026449/ner/lower_model new file mode 100644 index 0000000000000000000000000000000000000000..48a26fafd142b6d013042c1d15d868dbe2cb9699 --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/ner/lower_model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62aac89b58a37a396b2df0d603aeb0050f43e9281a7a6bbe6164e6b5a03338cd +size 1240362 diff --git a/data/train_NER/models/de_1891_20026449/ner/moves b/data/train_NER/models/de_1891_20026449/ner/moves new file mode 100644 index 0000000000000000000000000000000000000000..4824aeb441654acf1139f7f1cfa4e3d0a096aa35 --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/ner/moves @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51877cad668c9c632f35eaa406465c66a4229cbc7267e4061014a94f1fbe6928 +size 1301 diff --git a/data/train_NER/models/de_1891_20026449/ner/tok2vec_model b/data/train_NER/models/de_1891_20026449/ner/tok2vec_model new file mode 100644 index 0000000000000000000000000000000000000000..aee6b5b3e935ddcaf48c8a1c209ab078e214a00f --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/ner/tok2vec_model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e3754e912fd43e8e9a0d77182ffabce66dec9681a85e57fc26a15c1272e7de +size 11333134 diff --git a/data/train_NER/models/de_1891_20026449/ner/upper_model b/data/train_NER/models/de_1891_20026449/ner/upper_model new file mode 100644 index 0000000000000000000000000000000000000000..8a6ce51ba23b63622d6487faeae2810de0fe6eff --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/ner/upper_model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08751eef1b22c30b8a97d4b8c5c07bfbe4ab3b4398e15308334e33868eda79d3 +size 13925 diff --git a/data/train_NER/models/de_1891_20026449/parser/cfg b/data/train_NER/models/de_1891_20026449/parser/cfg new file mode 100644 index 0000000000000000000000000000000000000000..fffb9bc458d8f86ea5c6fd879fb4f1fc3fcfb918 --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/parser/cfg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5cc5d0fbaceabb3f30aa436ade9190f45d3621b995b9e3920046097e1dc81d3 +size 316 diff --git a/data/train_NER/models/de_1891_20026449/parser/lower_model b/data/train_NER/models/de_1891_20026449/parser/lower_model new file mode 100644 index 0000000000000000000000000000000000000000..942c62908acfbd40102854b7d420ce12dbafe446 --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/parser/lower_model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0728a2732d866e464c0fc641af5e7c5e585dbd76b82b6710958244aa1415e8ce +size 2685162 diff --git a/data/train_NER/models/de_1891_20026449/parser/moves b/data/train_NER/models/de_1891_20026449/parser/moves new file mode 100644 index 0000000000000000000000000000000000000000..6943e10b7aa9fb5aa2fbc114b1432b491e2a06cc --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/parser/moves @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3f53c625c47fef99d1ce25ed4980716209220e93fc09bf1b4ac81ed9ac7786 +size 10458 diff --git a/data/train_NER/models/de_1891_20026449/parser/tok2vec_model b/data/train_NER/models/de_1891_20026449/parser/tok2vec_model new file mode 100644 index 0000000000000000000000000000000000000000..18b0aa09db35803546f4d7b321653fa538f5ca05 --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/parser/tok2vec_model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65e6ce101927febbedf88f660f7701aab25d1dbb443d8895b541eb185742f0f +size 11333138 diff --git a/data/train_NER/models/de_1891_20026449/parser/upper_model b/data/train_NER/models/de_1891_20026449/parser/upper_model new file mode 100644 index 0000000000000000000000000000000000000000..318f1b87deb75933792006b7bfd5291603b6e684 --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/parser/upper_model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155ae76239fd9a4286bd20155d2fc55c8c3bfedead10f4a4e6a1bbeb76d6c836 +size 108001 diff --git a/data/train_NER/models/de_1891_20026449/tagger/cfg b/data/train_NER/models/de_1891_20026449/tagger/cfg new file mode 100644 index 0000000000000000000000000000000000000000..ea7c1f8eaafb870f51506826812d0b6bd9c6d08b --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/tagger/cfg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e138e2ef96856c9fd99bd125412183fd64b1a5869a5170d227d14e157cb00c +size 103 diff --git a/data/train_NER/models/de_1891_20026449/tagger/model b/data/train_NER/models/de_1891_20026449/tagger/model new file mode 100644 index 0000000000000000000000000000000000000000..1702c40444a49a4719808ebdbf94e443e4b1ca03 --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/tagger/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecdeac1885a2abfd7b8efd9290b2bf83af0bd6dedfa4d4364dbdbac83f19b226 +size 11361726 diff --git a/data/train_NER/models/de_1891_20026449/tagger/tag_map b/data/train_NER/models/de_1891_20026449/tagger/tag_map new file mode 100644 index 0000000000000000000000000000000000000000..9ebb28c739e996c0aab2c6e5ab5e652b094ba121 --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/tagger/tag_map @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f932cdd2fc67a978794bdbfc6886ccdf7a8c5d21cca527e3c517af535416069 +size 1189 diff --git a/data/train_NER/models/de_1891_20026449/tokenizer b/data/train_NER/models/de_1891_20026449/tokenizer new file mode 100644 index 0000000000000000000000000000000000000000..de940f6eb7df196fce7f6d569d49f467b34cab2d --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/tokenizer @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c79890b18a3f62bf37591bac37a08023139316effaa2ff4e37dbd95e733f33ae +size 10948 diff --git a/data/train_NER/models/de_1891_20026449/vocab/key2row b/data/train_NER/models/de_1891_20026449/vocab/key2row new file mode 100644 index 0000000000000000000000000000000000000000..c66317f8d354f32911b0beb802a18ab12b6507c9 --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/vocab/key2row @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71 +size 1 diff --git a/data/train_NER/models/de_1891_20026449/vocab/lexemes.bin b/data/train_NER/models/de_1891_20026449/vocab/lexemes.bin new file mode 100644 index 0000000000000000000000000000000000000000..485eb555f2b19b515216b6aef44c68693e86affd --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/vocab/lexemes.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4d1461f7aeea02a16eff98771ff925fbab99f6f1dac0f107684fe080ade46b +size 7446144 diff --git a/data/train_NER/models/de_1891_20026449/vocab/strings.json b/data/train_NER/models/de_1891_20026449/vocab/strings.json new file mode 100644 index 0000000000000000000000000000000000000000..bace3a9f3b61bd77e7ef9c189de9b4c5d5fa11ef --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/vocab/strings.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31cdf9d2334dcf4e42e3b8d6901524c576dca8c22642597c9ae0781571410076 +size 2415753 diff --git a/data/train_NER/models/de_1891_20026449/vocab/vectors b/data/train_NER/models/de_1891_20026449/vocab/vectors new file mode 100644 index 0000000000000000000000000000000000000000..c3cdeaca40454242d7cdebf54ce1f2a21dba696d --- /dev/null +++ b/data/train_NER/models/de_1891_20026449/vocab/vectors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14772b683e726436d5948ad3fff2b43d036ef2ebbe3458aafed6004e05a40706 +size 128