diff --git a/.gitattributes b/.gitattributes index 3ceadf228852756feeebe34d34564f26caabbaa5..3c911cee18c62f7943df355469b57835db431c7b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -169,6 +169,7 @@ data/train_NER/20190109_train_NER.tar.gz filter=lfs diff=lfs merge=lfs -text data/AB/1970/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text data/AB/1971/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text data/train_NER/20190116_train_NER_french.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1972/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text data/AB/1926/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text data/AB/1927/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text data/AB/1928/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text @@ -241,3 +242,14 @@ data/AB/1994/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text data/AB/1995/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text data/politicians/MPs_after1890.csv filter=lfs diff=lfs merge=lfs -text data/politicians/lastnames/** filter=lfs diff=lfs merge=lfs -text +data/AB/1973/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1975/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1976/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1974/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1980/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1981/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1977/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1982/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1983/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1991/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text +data/AB/1991/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text diff --git a/.renku/workflow/337b046f45a64dec9c810ea6e51ecaad_python.cwl b/.renku/workflow/337b046f45a64dec9c810ea6e51ecaad_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..8f8df800e526f544aadb6296ce07959ce6783d3b --- /dev/null +++ b/.renku/workflow/337b046f45a64dec9c810ea6e51ecaad_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_correctxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1973/02_extractedxml.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1973/04_correctedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1973 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/55db25b9be3e4de8a799064ce8a81038_python.cwl b/.renku/workflow/55db25b9be3e4de8a799064ce8a81038_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..60cb7b6b64955af6734cdc5085559545f9ccc7a2 --- /dev/null +++ b/.renku/workflow/55db25b9be3e4de8a799064ce8a81038_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_extract_origxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1981/00_rawpdfs.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1981/02_extractedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1981 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/61efd03ecd5a474e86750b7d60a0fe29_python.cwl b/.renku/workflow/61efd03ecd5a474e86750b7d60a0fe29_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..d8ea2da4010643e2d712b972595897236af2f55b --- /dev/null +++ b/.renku/workflow/61efd03ecd5a474e86750b7d60a0fe29_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_extract_origxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1991/00_rawpdfs.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1991/02_extractedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1991 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/7b69fad35c574cbfb28b966576580e0d_python.cwl b/.renku/workflow/7b69fad35c574cbfb28b966576580e0d_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..bfd6aa94e19ac1e78ad7275032beb77935681205 --- /dev/null +++ b/.renku/workflow/7b69fad35c574cbfb28b966576580e0d_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_extract_origxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1980/00_rawpdfs.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1980/02_extractedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1980 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/95aa274372e6493190e78f082644bc24_python.cwl b/.renku/workflow/95aa274372e6493190e78f082644bc24_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..2848aa558dd8961d372c5c5cea47d12286191413 --- /dev/null +++ b/.renku/workflow/95aa274372e6493190e78f082644bc24_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_extract_origxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1982/00_rawpdfs.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1982/02_extractedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1982 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/a29a24b4b00f4b7eaf5540b98f81966a_python.cwl b/.renku/workflow/a29a24b4b00f4b7eaf5540b98f81966a_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..b150efbbbc130a6cd49b09485c7493eb99a6bd16 --- /dev/null +++ b/.renku/workflow/a29a24b4b00f4b7eaf5540b98f81966a_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_correctxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1972/02_extractedxml.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1972/04_correctedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1972 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/a529202c4bba462f8b66a848b64eb10f_python.cwl b/.renku/workflow/a529202c4bba462f8b66a848b64eb10f_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..ff090ca30c8768c448c2e4b07cce37b58de8b567 --- /dev/null +++ b/.renku/workflow/a529202c4bba462f8b66a848b64eb10f_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_correctxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1991/02_extractedxml.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1991/04_correctedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1991 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/ae0772e5030141aea58238ddba633337_python.cwl b/.renku/workflow/ae0772e5030141aea58238ddba633337_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..03abb751e2cc7141b9a95d9031b25007a8705fbf --- /dev/null +++ b/.renku/workflow/ae0772e5030141aea58238ddba633337_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_extract_origxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1983/00_rawpdfs.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1983/02_extractedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1983 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/b46e116548cc4088b25c047ad4b262ba_python.cwl b/.renku/workflow/b46e116548cc4088b25c047ad4b262ba_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..5a837e26475eaf523773693e8fe299dbb5f7321a --- /dev/null +++ b/.renku/workflow/b46e116548cc4088b25c047ad4b262ba_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_extract_origxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1975/00_rawpdfs.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1975/02_extractedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1975 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/ced9d30b55bc41c7aeb714cd1d863012_python.cwl b/.renku/workflow/ced9d30b55bc41c7aeb714cd1d863012_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..67dea3795f9c6c470703e9d053f0f22f80bd26b0 --- /dev/null +++ b/.renku/workflow/ced9d30b55bc41c7aeb714cd1d863012_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_correctxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1974/02_extractedxml.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1974/04_correctedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1974 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/d4a366e49dc54eee9cc13a47e884b9e5_python.cwl b/.renku/workflow/d4a366e49dc54eee9cc13a47e884b9e5_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..149d732502db5cf746d80f61af9463bc06b12906 --- /dev/null +++ b/.renku/workflow/d4a366e49dc54eee9cc13a47e884b9e5_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_extract_origxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1977/00_rawpdfs.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1977/02_extractedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1977 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/e932e705a4434ee09512f22997fa627f_python.cwl b/.renku/workflow/e932e705a4434ee09512f22997fa627f_python.cwl new file mode 100644 index 0000000000000000000000000000000000000000..3d6ea5d6e043a9fef5ec10e45ecc58d9fac870b8 --- /dev/null +++ b/.renku/workflow/e932e705a4434ee09512f22997fa627f_python.cwl @@ -0,0 +1,51 @@ +arguments: [] +baseCommand: +- python +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: + class: File + path: ../../src/python/run_extract_origxml.py + inputBinding: + position: 1 + separate: true + shellQuote: true + streamable: false + type: File + input_2: + default: + class: File + path: ../../data/AB/1976/00_rawpdfs.tar.gz + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: File + input_3: + default: data/AB/1976/02_extractedxml.tar.gz + inputBinding: + position: 3 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_3) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/AB/1976 + writable: true +successCodes: [] +temporaryFailCodes: [] diff --git a/data/AB/1972/04_correctedxml.tar.gz b/data/AB/1972/04_correctedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a936494ecf16a998574d4505b08ae421d9539c1e --- /dev/null +++ b/data/AB/1972/04_correctedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fc2b7da468479d966114d65d094869b5cb0d362805397138b98783b4f61187c +size 32454837 diff --git a/data/AB/1973/04_correctedxml.tar.gz b/data/AB/1973/04_correctedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..328b4066e7441948288c136d187f897490adc17f --- /dev/null +++ b/data/AB/1973/04_correctedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4a6ad272fc8e6015fb2da4f16a7f4363d7d835427cf9c4777294f982424e6b +size 20865709 diff --git a/data/AB/1974/04_correctedxml.tar.gz b/data/AB/1974/04_correctedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b22e2c3e141a453a88f320d14d4dce26e3db7a8 --- /dev/null +++ b/data/AB/1974/04_correctedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ed18e34920d0f4a6402cf9a96ecdd12dc47ccb3a2abab0976bc5911dff8ec0 +size 22540482 diff --git a/data/AB/1975/02_extractedxml.tar.gz b/data/AB/1975/02_extractedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ec32677fc873e503caecf4042106bb198b29aab --- /dev/null +++ b/data/AB/1975/02_extractedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb09c4ec6b75766c80221307ec56c25269ed0d6256fed85d34d6cee3a919740 +size 379227481 diff --git a/data/AB/1976/02_extractedxml.tar.gz b/data/AB/1976/02_extractedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b6b63886dece072eafca28b2839ac7409cc61ad --- /dev/null +++ b/data/AB/1976/02_extractedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5f661baa69055ad3e624c87c61de9912b4f0c0dfc7895b85ffbc55ab6644ab +size 321115032 diff --git a/data/AB/1977/02_extractedxml.tar.gz b/data/AB/1977/02_extractedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d3d86d0b41745fb8b6e22af9b580ab3835c07dea --- /dev/null +++ b/data/AB/1977/02_extractedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712d29e255e7df6857be63ca07b46fe767193bba618694325da6ab1ad69491e7 +size 314960855 diff --git a/data/AB/1980/02_extractedxml.tar.gz b/data/AB/1980/02_extractedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9a49a4f352da8f831afa549a7ab7bfdfb699bba1 --- /dev/null +++ b/data/AB/1980/02_extractedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0baf515b60277460abe248e29981e2fb35cb335b5bcd8fe6efcb8d325738f59 +size 325333041 diff --git a/data/AB/1981/02_extractedxml.tar.gz b/data/AB/1981/02_extractedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..94e8d0fa81abff2c90a60f0974f392c63fcdca3b --- /dev/null +++ b/data/AB/1981/02_extractedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8ff09ddaa77c85d59601eed6e8817489db27a055f353cdef9aae7a28e24db03 +size 341425000 diff --git a/data/AB/1982/02_extractedxml.tar.gz b/data/AB/1982/02_extractedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a73875c1cea2143d450fea4c66eb6de3b239068 --- /dev/null +++ b/data/AB/1982/02_extractedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b23affdc93e07cd6c6348ce5816b945f2f22f5d570b34517ece1b6b6f2e6dbc4 +size 322541816 diff --git a/data/AB/1983/02_extractedxml.tar.gz b/data/AB/1983/02_extractedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa9744c30b883f4079d245a176f7505055b27b96 --- /dev/null +++ b/data/AB/1983/02_extractedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab9c3e50b2595f534af638ed1fa1ea8e86f102966f17855fd1ea154dc97bd20 +size 302597163 diff --git a/data/AB/1991/02_extractedxml.tar.gz b/data/AB/1991/02_extractedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d62dd1a39a0ab010b35b30ab3e856ef42b65c5a3 --- /dev/null +++ b/data/AB/1991/02_extractedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd690cf237cfc067e717432e64e03c670fe56392ffd14e98fe70b015024fa81 +size 431087808 diff --git a/data/AB/1991/04_correctedxml.tar.gz b/data/AB/1991/04_correctedxml.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c2b7184d659cc3b5d0e3abeab841188bd2f33b9 --- /dev/null +++ b/data/AB/1991/04_correctedxml.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed1bd53b584fca43253e09d30f9763daaa17ca599f200cc61b88608163c56a19 +size 29447328 diff --git a/src/sh/execute_per_year_isolation.sh b/src/sh/execute_per_year_isolation.sh new file mode 100755 index 0000000000000000000000000000000000000000..cd3f676b994f05672484e300931c3c452317003a --- /dev/null +++ b/src/sh/execute_per_year_isolation.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# The input variables are: +# 1 - name of python function to run +# 2 - folder of the database +# 3 - name of input file +# 4 - name of output file + +year_start=$5 +year_end=$6 + +for year in $(seq $year_start $year_end) +do + echo $year + $CONDA_DIR/envs/renku/bin/renku run --isolation python $1 ${2}/$year/${3}.tar.gz ${2}/$year/${4}.tar.gz +done +