diff --git a/prototypes/fine_tuning_spaCy/__pycache__/training_data.cpython-312.pyc b/prototypes/fine_tuning_spaCy/__pycache__/training_data.cpython-312.pyc index 2f55aa3..02fc94a 100644 Binary files a/prototypes/fine_tuning_spaCy/__pycache__/training_data.cpython-312.pyc and b/prototypes/fine_tuning_spaCy/__pycache__/training_data.cpython-312.pyc differ diff --git a/prototypes/fine_tuning_spaCy/data/train.spacy b/prototypes/fine_tuning_spaCy/data/train.spacy index e488733..38bfb54 100644 Binary files a/prototypes/fine_tuning_spaCy/data/train.spacy and b/prototypes/fine_tuning_spaCy/data/train.spacy differ diff --git a/prototypes/fine_tuning_spaCy/entities_output.json b/prototypes/fine_tuning_spaCy/entities_output.json index fb7268b..b6086e2 100644 --- a/prototypes/fine_tuning_spaCy/entities_output.json +++ b/prototypes/fine_tuning_spaCy/entities_output.json @@ -5,28 +5,8 @@ "page": 4 }, { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "5Mio.€", - "page": 4 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "1,40 %", - "page": 4 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "4,91 %", - "page": 4 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "4,0 %", - "page": 4 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "0,1%", + "label": "Laufzeit", + "entity": "10 bis 12 Jahre", "page": 4 }, { @@ -34,26 +14,6 @@ "entity": "4,0%", "page": 5 }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "4 %", - "page": 9 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "800.000", - "page": 9 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "600.000", - "page": 9 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "400.000", - "page": 9 - }, { "label": "AUSSCHÜTTUNGSRENDITE", "entity": "500.000 |", @@ -61,12 +21,12 @@ }, { "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "4", - "page": 9 + "entity": "5.20%", + "page": 11 }, { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "5.20%", + "label": "Laufzeit", + "entity": "10-20", "page": 11 }, { @@ -76,7 +36,7 @@ }, { "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "4%", + "entity": "45 %", "page": 11 }, { @@ -90,68 +50,68 @@ "page": 12 }, { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "559 BGB", - "page": 16 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "4 Soa", - "page": 16 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "4 —", - "page": 16 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "558 BGB", - "page": 16 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "557 BGB", - "page": 16 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "3-6", - "page": 16 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "Vor NK-Optimlerung", - "page": 17 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "K.", - "page": 18 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "4 an", - "page": 20 + "label": "Laufzeit", + "entity": "15-45\n% verlorene", + "page": 12 }, { "label": "RISIKOPROFIL", - "entity": "Value Adjustments", + "entity": "Bein", + "page": 15 + }, + { + "label": "RISIKOPROFIL", + "entity": "Extt", + "page": 15 + }, + { + "label": "Risikoprofil", + "entity": "16", + "page": 16 + }, + { + "label": "Laufzeit", + "entity": "10-12 Jahre", + "page": 22 + }, + { + "label": "RISIKOPROFIL", + "entity": "CD", "page": 25 }, { "label": "RISIKOPROFIL", - "entity": "Dach/", + "entity": "CRD", + "page": 25 + }, + { + "label": "RISIKOPROFIL", + "entity": "CRR", + "page": 25 + }, + { + "label": "RISIKOPROFIL", + "entity": "CO2-neutrale", "page": 28 }, { "label": "RISIKOPROFIL", "entity": "CO2-Emissionen", + "page": 28 + }, + { + "label": "RISIKOPROFIL", + "entity": "CO2-neutralen", "page": 30 }, { "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "UM\\", + "entity": "55 Standard", + "page": 30 + }, + { + "label": "RISIKOPROFIL", + "entity": "Ende", "page": 30 }, { @@ -160,28 +120,53 @@ "page": 30 }, { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "5%", - "page": 31 + "label": "RISIKOPROFIL", + "entity": "Uber", + "page": 34 }, { "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "5%", - "page": 31 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "1%", + "entity": "45%", "page": 35 }, { "label": "AUSSCHÜTTUNGSRENDITE", + "entity": "54%", + "page": 35 + }, + { + "label": "RISIKOPROFIL", + "entity": "£\nKlar definierte", + "page": 38 + }, + { + "label": "RISIKOPROFIL", + "entity": "Hand", + "page": 38 + }, + { + "label": "RISIKOPROFIL", "entity": "CO2-Emissionen", "page": 38 }, + { + "label": "RISIKOPROFIL", + "entity": "Ries", + "page": 39 + }, + { + "label": "RISIKOPROFIL", + "entity": "Rene Sondermann", + "page": 39 + }, { "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "84,0%", + "entity": "44%", + "page": 40 + }, + { + "label": "Laufzeit", + "entity": "10 - 12 Jahre", "page": 42 }, { @@ -190,23 +175,8 @@ "page": 42 }, { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "0,1 %", - "page": 42 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "0,44 %", - "page": 42 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "1,40 %", - "page": 42 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "491%", + "label": "RISIKOPROFIL", + "entity": "0,01%", "page": 42 }, { @@ -220,13 +190,8 @@ "page": 44 }, { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "1,40%", - "page": 45 - }, - { - "label": "AUSSCHÜTTUNGSRENDITE", - "entity": "4,0%", - "page": 45 + "label": "Laufzeit", + "entity": "10 - 12 Jahre", + "page": 44 } ] \ No newline at end of file diff --git a/prototypes/fine_tuning_spaCy/output/model-best/meta.json b/prototypes/fine_tuning_spaCy/output/model-best/meta.json index 7e74640..477e900 100644 --- a/prototypes/fine_tuning_spaCy/output/model-best/meta.json +++ b/prototypes/fine_tuning_spaCy/output/model-best/meta.json @@ -2,13 +2,13 @@ "lang":"de", "name":"pipeline", "version":"0.0.0", - "spacy_version":">=3.7.2,<3.8.0", + "spacy_version":">=3.8.5,<3.9.0", "description":"", "author":"", "email":"", "url":"", "license":"", - "spacy_git_version":"a89eae928", + "spacy_git_version":"d0c705c", "vectors":{ "width":0, "vectors":0, @@ -22,7 +22,10 @@ ], "ner":[ "AUSSCH\u00dcTTUNGSRENDITE", - "RISIKOPROFIL" + "Aussch\u00fcttungsrendite", + "Laufzeit", + "RISIKOPROFIL", + "Risikoprofil" ] }, "pipeline":[ @@ -37,9 +40,9 @@ ], "performance":{ - "ents_f":0.9107142857, - "ents_p":0.8360655738, - "ents_r":1.0, + "ents_f":0.9007633588, + "ents_p":0.8309859155, + "ents_r":0.9833333333, "ents_per_type":{ "RISIKOPROFIL":{ "p":1.0, @@ -47,12 +50,27 @@ "f":1.0 }, "AUSSCH\u00dcTTUNGSRENDITE":{ - "p":0.6296296296, + "p":0.5925925926, + "r":0.9411764706, + "f":0.7272727273 + }, + "Aussch\u00fcttungsrendite":{ + "p":0.6666666667, "r":1.0, - "f":0.7727272727 + "f":0.8 + }, + "Laufzeit":{ + "p":1.0, + "r":1.0, + "f":1.0 + }, + "Risikoprofil":{ + "p":1.0, + "r":1.0, + "f":1.0 } }, - "tok2vec_loss":34.4831294568, - "ner_loss":1020.9595334249 + "tok2vec_loss":128.6722957797, + "ner_loss":1581.2761230469 } } \ No newline at end of file diff --git a/prototypes/fine_tuning_spaCy/output/model-best/ner/model b/prototypes/fine_tuning_spaCy/output/model-best/ner/model index af79c1c..4bdcbcf 100644 Binary files a/prototypes/fine_tuning_spaCy/output/model-best/ner/model and b/prototypes/fine_tuning_spaCy/output/model-best/ner/model differ diff --git a/prototypes/fine_tuning_spaCy/output/model-best/ner/moves b/prototypes/fine_tuning_spaCy/output/model-best/ner/moves index ed9a1b8..b293ca8 100644 --- a/prototypes/fine_tuning_spaCy/output/model-best/ner/moves +++ b/prototypes/fine_tuning_spaCy/output/model-best/ner/moves @@ -1 +1 @@ -moves{"0":{},"1":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40},"2":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40},"3":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40},"4":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"":1},"5":{"":1}}cfgneg_key \ No newline at end of file +moves{"0":{},"1":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":21,"Aussch\u00fcttungsrendite":8,"Risikoprofil":5},"2":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":21,"Aussch\u00fcttungsrendite":8,"Risikoprofil":5},"3":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":21,"Aussch\u00fcttungsrendite":8,"Risikoprofil":5},"4":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":21,"Aussch\u00fcttungsrendite":8,"Risikoprofil":5,"":1},"5":{"":1}}cfgneg_key \ No newline at end of file diff --git a/prototypes/fine_tuning_spaCy/output/model-best/tok2vec/model b/prototypes/fine_tuning_spaCy/output/model-best/tok2vec/model index 10673a9..3c788ee 100644 Binary files a/prototypes/fine_tuning_spaCy/output/model-best/tok2vec/model and b/prototypes/fine_tuning_spaCy/output/model-best/tok2vec/model differ diff --git a/prototypes/fine_tuning_spaCy/output/model-best/vocab/strings.json b/prototypes/fine_tuning_spaCy/output/model-best/vocab/strings.json index dabd0da..dff255e 100644 --- a/prototypes/fine_tuning_spaCy/output/model-best/vocab/strings.json +++ b/prototypes/fine_tuning_spaCy/output/model-best/vocab/strings.json @@ -52,7 +52,11 @@ "+", ",", ",00", + ",12", ",25", + ",40", + ",80", + ",91", "-", "-((", "-))", @@ -106,39 +110,55 @@ ".w.", ".\u00e4.", "/", + "/2,12", "/3", "/Core+", + "/FK", + "/XX", "/Xxxx+", "/core+", "/d", + "/d,dd", + "/fk", + "/xx", "/xxxx+", "0", "0%+", + "0,1", + "0,80", "0.0", "0.o", + "000", "018", "022", + "023", "032", "034", "0_0", "0_o", "1", + "1,40", "1.", "10", + "100", "12", "14", "2", "2.", "20", "200", + "200.000", "2018", "2022", "2032", "2034", + "24", + "25", "250", "3", "3,8", "3.", + "31.12.2023", "33", "333", "35", @@ -149,6 +169,7 @@ "4,7", "4,8", "4,9", + "4,91", "40", "5", "5,0", @@ -157,11 +178,14 @@ "5,3", "5,7", "50", + "5Mio.\u20ac", + "5mio.\u20ac", "6", "60", "7", "7,5", "7,50%+", + "75", "8", "8)", "8-", @@ -277,6 +301,7 @@ "A.G.", "AIN", "AUSSCH\u00dcTTUNGSRENDITE", + "Ab", "Abb", "Abb.", "Abbildung", @@ -294,13 +319,18 @@ "AlF", "Allgemeine", "Amsterdam", + "Angestrebter", + "Ankauf", "Ankaufs-", + "Ankaufsphase", "Anlagebedingungen", "Anlagestrategien", "Anlagesusschuss", "Anlagevehikels", + "Annahmen", "Ansprechpartners", "Antagevehikels", + "Anteilseigner", "Apr", "Apr.", "April", @@ -309,6 +339,8 @@ "Artikel", "Assets", "Aufl\u00f6sung", + "Aufnahme", + "Auftakt", "Aufwertung", "Aug", "Aug.", @@ -319,6 +351,10 @@ "Ausschu\u0308ttungsintervalle", "Ausschu\u0308ttungsrandite", "Ausschu\u0308ttungsrendite", + "Aussch\u00fcttungsintervalle", + "Aussch\u00fcttungsrandite", + "Aussch\u00fcttungsrendite", + "Aussch\u00fcttungsrendites", "B", "B.", "B.A.", @@ -371,6 +407,7 @@ "D", "D.", "D.C.", + "Darlehen", "Deutsche", "Deutschland", "Deutschlands", @@ -379,6 +416,7 @@ "Dezember", "Di", "Di.", + "Die", "Dienstag", "Different", "Dipl", @@ -397,6 +435,9 @@ "E-Mail", "E.", "ERD", + "ESG-", + "EUR", + "Eigenkapital", "Einmalige", "Einw", "Einwohner", @@ -406,6 +447,7 @@ "Exit", "F", "F.", + "FDR", "FIL", "FRANCE", "Fa", @@ -416,6 +458,8 @@ "Feb", "Feb.", "Februar", + "Finanzierung", + "Finanzierungskonditionen", "Firma", "Fond", "Fonds", @@ -424,6 +468,7 @@ "France", "Frankreich", "Frau", + "Fremdkapital", "French", "Frl", "Frl.", @@ -440,10 +485,12 @@ "Gebr", "Gebr.", "Gebu\u0308hren", + "Geb\u00fchren", "Geplantes", "Gesamtrendite", "H", "H.", + "Haltedauer", "Halten", "Halten-Strategie", "Hauptbahnhof", @@ -482,7 +529,12 @@ "Informationen", "Ing", "Ing.", + "Invastitionsfokus", + "Investftionsvolumen", + "Investitionen", + "Investitions-annahmen", "Investmentmanagers", + "Investmentzeitraum", "Investtionszeltraum", "It", "J", @@ -507,8 +559,11 @@ "K", "K.", "K.O.", + "Kapitalzusagen", "Kaufen", + "Keine", "Key", + "Klassifizierung", "Kosten", "L", "L'", @@ -543,12 +598,14 @@ "Milliarde", "Million", "Mindestanlage", + "Mindestens", "Mio", "Mio.", "Mitgliedschaft", "Mittwoch", "Mo", "Mo.", + "Monate", "Montag", "Mr", "Mr.", @@ -586,6 +643,8 @@ "OPCI", "O_O", "O_o", + "Objekt", + "Objekten", "Of", "Offen", "Offener", @@ -610,7 +669,9 @@ "Professor", "Prognostizierte", "Punkt", + "Q", "Qin", + "Quote", "R", "R.", "R.I.P.", @@ -625,6 +686,8 @@ "Red.", "Redaktion", "Regionen", + "Rendite-", + "Renovierungen", "Rents", "Residential", "Retail", @@ -633,6 +696,8 @@ "Rotterdam", "S", "S'", + "SFDR", + "SG-", "SPAIN", "SRI", "Sa", @@ -654,6 +719,7 @@ "Spezial-AlF", "St", "St.", + "Stabilisierung", "Standortaufwertungsstrategie", "Standorte", "Std", @@ -666,11 +732,13 @@ "Stra\u00dfe", "Struktur", "Strukturierungsgebu\u0308hr", + "Strukturierungsgeb\u00fchr", "Stunde", "St\u00e4dten", "S\u2019", "T", "T.", + "Tag", "Tausend", "Teflimmobilfe)-", "Tel", @@ -698,7 +766,9 @@ "V.V", "V_V", "Value", + "Verbesserungen", "Vergu\u0308tung", + "Verg\u00fctung", "Verkaufs", "Verkaufs-", "Vol", @@ -724,6 +794,7 @@ "XX", "XX.", "XXX", + "XXX-", "XXX.", "XXX/XXX", "XXXX", @@ -742,6 +813,7 @@ "Xxxx", "Xxxx+", "Xxxx+/Xxxxx", + "Xxxx-", "Xxxx-XXX", "Xxxx-Xxxxx-XXX", "Xxxx.", @@ -753,6 +825,7 @@ "Xxxxx-", "Xxxxx-XxX", "Xxxxx-Xxxxx", + "Xxxxx-xxxx", "Xxxxx\u0308xx", "Xxxxx\u0308xxxx", "Xxxx\u0308xxxx", @@ -763,11 +836,15 @@ "Ziel-LTY", "Ziel-Netto-IRR", "Zielanlagestrategie", + "Zielm\u00e4rkte", + "Zielobjektgr\u00f6\u00dfe", "Zielregionfen)/Jand", "Zielrendite", "Zielsektoren", "Zielvolumen", "Ziirraiaein", + "Zins-", + "Zinss\u00e4tze", "Zt", "Zt.", "[", @@ -806,6 +883,7 @@ "a.g.", "a.m.", "a.z.", + "ab", "abb", "abb.", "abk", @@ -832,10 +910,12 @@ "aiming", "ain", "ake", + "akt", "aktive", "aktueller", "al", "al.", + "ald", "alf", "all", "allg", @@ -849,15 +929,21 @@ "an", "an.", "and", + "angestrebt", + "angestrebter", + "ankauf", "ankaufs-", + "ankaufsphase", "anlagebedingungen", "anlagestrategien", "anlagesusschuss", "anlagevehikels", + "annahmen", "ans", "ansprechpartners", "ant", "antagevehikels", + "anteilseigner", "apr", "apr.", "are", @@ -867,6 +953,7 @@ "art", "artikel", "as", + "ase", "asset", "assetor", "assets", @@ -877,6 +964,8 @@ "attractive", "auf", "aufl\u00f6sung", + "aufnahme", + "auftakt", "aufwertung", "auf\u2019m", "aug", @@ -885,11 +974,15 @@ "auschittungsrendite", "auschu\u0308ttungsrendite", "ausgew\u00e4hlte", + "ausschlie\u00dflich", "ausschu\u0308ttung", "ausschu\u0308ttungsintervalle", "ausschu\u0308ttungsrandite", "ausschu\u0308ttungsrendite", + "aussch\u00fcttungsintervalle", + "aussch\u00fcttungsrandite", "aussch\u00fcttungsrendite", + "aussch\u00fcttungsrendites", "ave", "ax.", "b", @@ -904,6 +997,7 @@ "bd", "bd.", "be", + "bei", "beispielsweise", "belgium", "benefits", @@ -921,6 +1015,7 @@ "bhf.", "biol", "biol.", + "bis", "bk.", "bon", "br.", @@ -999,11 +1094,15 @@ "d.h.", "d.x", "dX", + "dXxx.\u20ac", "d_d", "d_x", "dam", + "darlehen", "dd", + "dd.dd.dddd", "ddd", + "ddd.ddd", "dddd", "de", "ded", @@ -1024,6 +1123,7 @@ "dgl.", "di", "di.", + "die", "different", "dipl", "dipl.", @@ -1042,6 +1142,7 @@ "du", "du\u2019s", "dv.", + "dxxx.\u20ac", "d\u00e4nemark", "d\u2019", "e", @@ -1060,13 +1161,16 @@ "ebd.", "ebenda", "ebr", + "ebt", "economic", "ed.", "eep", "egr", + "egt", "egy", "ehem", "ehem.", + "eigenkapital", "eigentlich", "eigtl", "eigtl.", @@ -1076,11 +1180,15 @@ "einen", "einer", "einmalige", + "eins", "einw", "einwohner", + "einzeln", "eit", + "ekt", "el.", "eld", + "eln", "els", "em.", "en.", @@ -1090,6 +1198,7 @@ "engl.", "englisch", "enhancing", + "ens", "ent", "entspr", "entspr.", @@ -1100,6 +1209,7 @@ "er.", "erd", "ere", + "erfolgten", "erg", "erm", "erm.", @@ -1110,12 +1220,14 @@ "ery", "er\u2019s", "es", + "esg-", "ess", "estate", "etc", "etc.", "etr", "ets", + "eur", "europe", "european", "ev", @@ -1127,6 +1239,7 @@ "experienced", "expertise", "exposure", + "externe", "extract", "ez.", "e\u2019s", @@ -1136,13 +1249,17 @@ "fa.", "fam", "fam.", + "fdr", "feb", "feb.", "fen", "festen", + "festgelegt", "festgelegter", "fierce", "fil", + "finanzierung", + "finanzierungskonditionen", "flagship", "fond", "fonds", @@ -1153,6 +1270,7 @@ "france", "frankreich", "franz\u00f6sisch", + "fremdkapital", "french", "frl", "frl.", @@ -1162,6 +1280,7 @@ "fs.", "fund", "fu\u0308hrende", + "f\u00fcr", "f\u2019m", "g", "g.", @@ -1173,6 +1292,7 @@ "gebr", "gebr.", "gebu\u0308hren", + "geb\u00fchren", "ged", "gedis", "gegebenenfalls", @@ -1185,6 +1305,7 @@ "gen", "geographies", "geplantes", + "gepr\u00fcft", "ger", "gerd", "gesamtrendite", @@ -1209,6 +1330,7 @@ "h.", "h.c", "h.c.", + "haltedauer", "halten", "halten-strategie", "have", @@ -1217,6 +1339,7 @@ "hd.", "hed", "hem", + "hen", "here", "hf.", "hg", @@ -1227,6 +1350,7 @@ "hinter\u2019m", "hip", "his", + "hme", "hotel", "hotels", "hr", @@ -1303,9 +1427,14 @@ "insb.", "insbesondere", "interesting", + "invastitionsfokus", + "investftionsvolumen", + "investitionen", + "investitions-annahmen", "investment", "investmentmanagers", "investments", + "investmentzeitraum", "investor", "investtionszeltraum", "inw", @@ -1313,6 +1442,7 @@ "iol", "ion", "ipl", + "ird", "irr", "is", "ise", @@ -1334,6 +1464,7 @@ "jahre", "jan", "jan.", + "jedes", "jh", "jh.", "jhd", @@ -1346,20 +1477,26 @@ "jun.", "jur", "jur.", + "j\u00e4hrlich", "j\u00e4hrliche", "k", "k.", "k.o.", + "kapitalzusagen", "kath", "kath.", "katholisch", "kaufen", + "keine", "kel", "ket", "key", "kl.", + "klassifizierung", "kosten", "kt.", + "kte", + "kus", "l", "l'", "l.", @@ -1452,6 +1589,7 @@ "mitgliedschaft", "mo", "mo.", + "monate", "monatlich", "mr", "mr.", @@ -1474,6 +1612,7 @@ "n.y.", "n.y.c.", "nach", + "nachhaltig", "nachvermietungsstrategie", "name", "nat", @@ -1493,6 +1632,7 @@ "ner", "nes", "netherlands", + "neu", "ng.", "ngl", "ngs", @@ -1505,6 +1645,7 @@ "nov.", "nr", "nr.", + "ns-", "nsb", "nse", "nts", @@ -1522,10 +1663,13 @@ "o.o", "o.\u00e4", "o.\u00e4.", + "o.\u20ac", "o_0", "o_O", "o_o", "objectives", + "objekt", + "objekten", "of", "of.", "ofQin", @@ -1561,6 +1705,7 @@ "original", "orm", "ors", + "ote", "outside", "ov.", "ove", @@ -1609,6 +1754,7 @@ "q.e.d.", "qin", "quality", + "quote", "r", "r.", "r.i.p.", @@ -1630,6 +1776,8 @@ "regionen", "remains", "ren", + "rendite-", + "renovierungen", "rent", "rents", "rer", @@ -1649,6 +1797,7 @@ "rm.", "rms", "rn.", + "rne", "rof", "room", "rotterdam", @@ -1687,7 +1836,10 @@ "sept.", "set", "sf.", + "sfdr", + "sg-", "sg.", + "sich", "sie", "sie\u2019s", "sis", @@ -1696,6 +1848,7 @@ "small", "so", "so.", + "sobald", "sog", "sog.", "some", @@ -1709,6 +1862,8 @@ "ss.", "st", "st.", + "stabilisieren", + "stabilisierung", "standortaufwertungsstrategie", "standorte", "std", @@ -1724,17 +1879,20 @@ "strategy", "struktur", "strukturierungsgebu\u0308hr", + "strukturierungsgeb\u00fchr", "st\u00e4dten", "sw.", "s\u2019", "t", "t.", "tactical", + "tag", "tal", "targeting", "targets", "tc.", "td.", + "te-", "ted", "tee", "teflimmobilfe)-", @@ -1765,6 +1923,7 @@ "tsd", "tsd.", "tur", + "tze", "t\u00e4gl", "t\u00e4gl.", "t\u00e4glich", @@ -1781,6 +1940,7 @@ "u.u.", "u.v.m", "u.v.m.", + "uer", "ufs", "ug.", "ugh", @@ -1797,6 +1957,7 @@ "univ.", "unless", "unter", + "unterstellt", "unter\u2019m", "ur.", "ure", @@ -1826,8 +1987,10 @@ "var", "vel", "ver", + "verbesserungen", "vergleiche", "vergu\u0308tung", + "verg\u00fctung", "verkaufs", "verkaufs-", "very", @@ -1855,9 +2018,11 @@ "walls", "way", "well-established", + "werden", "wertstabile", "why", "wir", + "wird", "wir\u2019s", "wiss", "wiss.", @@ -1895,6 +2060,7 @@ "xx.x", "xxXxx", "xxx", + "xxx-", "xxx-Xxxxx", "xxx-xxxx", "xxx.", @@ -1938,13 +2104,20 @@ "ziel-lty", "ziel-netto-irr", "zielanlagestrategie", + "zielm\u00e4rkte", + "zielobjektgr\u00f6\u00dfe", "zielregionfen)/jand", "zielrendite", "zielsektoren", "zielvolumen", "ziirraiaein", + "zins-", + "zinss\u00e4tze", "zt", + "zu", + "zum", "zw.", + "zwischen", "zzgl", "zzgl.", "{", @@ -1977,11 +2150,14 @@ "\u00f6sterr", "\u00f6sterr.", "\u00f6sterreichisch", + "\u00f6\u00dfe", "\u00fc", "\u00fc.", "\u00fcber", "\u00fcbersicht", "\u00fcber\u2019m", + "\u00fcft", + "\u00fchr", "\u0308hr", "\u0ca0", "\u0ca0_\u0ca0", diff --git a/prototypes/fine_tuning_spaCy/output/model-last/meta.json b/prototypes/fine_tuning_spaCy/output/model-last/meta.json index 7e74640..477e900 100644 --- a/prototypes/fine_tuning_spaCy/output/model-last/meta.json +++ b/prototypes/fine_tuning_spaCy/output/model-last/meta.json @@ -2,13 +2,13 @@ "lang":"de", "name":"pipeline", "version":"0.0.0", - "spacy_version":">=3.7.2,<3.8.0", + "spacy_version":">=3.8.5,<3.9.0", "description":"", "author":"", "email":"", "url":"", "license":"", - "spacy_git_version":"a89eae928", + "spacy_git_version":"d0c705c", "vectors":{ "width":0, "vectors":0, @@ -22,7 +22,10 @@ ], "ner":[ "AUSSCH\u00dcTTUNGSRENDITE", - "RISIKOPROFIL" + "Aussch\u00fcttungsrendite", + "Laufzeit", + "RISIKOPROFIL", + "Risikoprofil" ] }, "pipeline":[ @@ -37,9 +40,9 @@ ], "performance":{ - "ents_f":0.9107142857, - "ents_p":0.8360655738, - "ents_r":1.0, + "ents_f":0.9007633588, + "ents_p":0.8309859155, + "ents_r":0.9833333333, "ents_per_type":{ "RISIKOPROFIL":{ "p":1.0, @@ -47,12 +50,27 @@ "f":1.0 }, "AUSSCH\u00dcTTUNGSRENDITE":{ - "p":0.6296296296, + "p":0.5925925926, + "r":0.9411764706, + "f":0.7272727273 + }, + "Aussch\u00fcttungsrendite":{ + "p":0.6666666667, "r":1.0, - "f":0.7727272727 + "f":0.8 + }, + "Laufzeit":{ + "p":1.0, + "r":1.0, + "f":1.0 + }, + "Risikoprofil":{ + "p":1.0, + "r":1.0, + "f":1.0 } }, - "tok2vec_loss":34.4831294568, - "ner_loss":1020.9595334249 + "tok2vec_loss":128.6722957797, + "ner_loss":1581.2761230469 } } \ No newline at end of file diff --git a/prototypes/fine_tuning_spaCy/output/model-last/ner/model b/prototypes/fine_tuning_spaCy/output/model-last/ner/model index af79c1c..4bdcbcf 100644 Binary files a/prototypes/fine_tuning_spaCy/output/model-last/ner/model and b/prototypes/fine_tuning_spaCy/output/model-last/ner/model differ diff --git a/prototypes/fine_tuning_spaCy/output/model-last/ner/moves b/prototypes/fine_tuning_spaCy/output/model-last/ner/moves index ed9a1b8..b293ca8 100644 --- a/prototypes/fine_tuning_spaCy/output/model-last/ner/moves +++ b/prototypes/fine_tuning_spaCy/output/model-last/ner/moves @@ -1 +1 @@ -moves{"0":{},"1":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40},"2":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40},"3":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40},"4":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"":1},"5":{"":1}}cfgneg_key \ No newline at end of file +moves{"0":{},"1":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":21,"Aussch\u00fcttungsrendite":8,"Risikoprofil":5},"2":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":21,"Aussch\u00fcttungsrendite":8,"Risikoprofil":5},"3":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":21,"Aussch\u00fcttungsrendite":8,"Risikoprofil":5},"4":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":21,"Aussch\u00fcttungsrendite":8,"Risikoprofil":5,"":1},"5":{"":1}}cfgneg_key \ No newline at end of file diff --git a/prototypes/fine_tuning_spaCy/output/model-last/tok2vec/model b/prototypes/fine_tuning_spaCy/output/model-last/tok2vec/model index 10673a9..3c788ee 100644 Binary files a/prototypes/fine_tuning_spaCy/output/model-last/tok2vec/model and b/prototypes/fine_tuning_spaCy/output/model-last/tok2vec/model differ diff --git a/prototypes/fine_tuning_spaCy/output/model-last/vocab/strings.json b/prototypes/fine_tuning_spaCy/output/model-last/vocab/strings.json index dabd0da..dff255e 100644 --- a/prototypes/fine_tuning_spaCy/output/model-last/vocab/strings.json +++ b/prototypes/fine_tuning_spaCy/output/model-last/vocab/strings.json @@ -52,7 +52,11 @@ "+", ",", ",00", + ",12", ",25", + ",40", + ",80", + ",91", "-", "-((", "-))", @@ -106,39 +110,55 @@ ".w.", ".\u00e4.", "/", + "/2,12", "/3", "/Core+", + "/FK", + "/XX", "/Xxxx+", "/core+", "/d", + "/d,dd", + "/fk", + "/xx", "/xxxx+", "0", "0%+", + "0,1", + "0,80", "0.0", "0.o", + "000", "018", "022", + "023", "032", "034", "0_0", "0_o", "1", + "1,40", "1.", "10", + "100", "12", "14", "2", "2.", "20", "200", + "200.000", "2018", "2022", "2032", "2034", + "24", + "25", "250", "3", "3,8", "3.", + "31.12.2023", "33", "333", "35", @@ -149,6 +169,7 @@ "4,7", "4,8", "4,9", + "4,91", "40", "5", "5,0", @@ -157,11 +178,14 @@ "5,3", "5,7", "50", + "5Mio.\u20ac", + "5mio.\u20ac", "6", "60", "7", "7,5", "7,50%+", + "75", "8", "8)", "8-", @@ -277,6 +301,7 @@ "A.G.", "AIN", "AUSSCH\u00dcTTUNGSRENDITE", + "Ab", "Abb", "Abb.", "Abbildung", @@ -294,13 +319,18 @@ "AlF", "Allgemeine", "Amsterdam", + "Angestrebter", + "Ankauf", "Ankaufs-", + "Ankaufsphase", "Anlagebedingungen", "Anlagestrategien", "Anlagesusschuss", "Anlagevehikels", + "Annahmen", "Ansprechpartners", "Antagevehikels", + "Anteilseigner", "Apr", "Apr.", "April", @@ -309,6 +339,8 @@ "Artikel", "Assets", "Aufl\u00f6sung", + "Aufnahme", + "Auftakt", "Aufwertung", "Aug", "Aug.", @@ -319,6 +351,10 @@ "Ausschu\u0308ttungsintervalle", "Ausschu\u0308ttungsrandite", "Ausschu\u0308ttungsrendite", + "Aussch\u00fcttungsintervalle", + "Aussch\u00fcttungsrandite", + "Aussch\u00fcttungsrendite", + "Aussch\u00fcttungsrendites", "B", "B.", "B.A.", @@ -371,6 +407,7 @@ "D", "D.", "D.C.", + "Darlehen", "Deutsche", "Deutschland", "Deutschlands", @@ -379,6 +416,7 @@ "Dezember", "Di", "Di.", + "Die", "Dienstag", "Different", "Dipl", @@ -397,6 +435,9 @@ "E-Mail", "E.", "ERD", + "ESG-", + "EUR", + "Eigenkapital", "Einmalige", "Einw", "Einwohner", @@ -406,6 +447,7 @@ "Exit", "F", "F.", + "FDR", "FIL", "FRANCE", "Fa", @@ -416,6 +458,8 @@ "Feb", "Feb.", "Februar", + "Finanzierung", + "Finanzierungskonditionen", "Firma", "Fond", "Fonds", @@ -424,6 +468,7 @@ "France", "Frankreich", "Frau", + "Fremdkapital", "French", "Frl", "Frl.", @@ -440,10 +485,12 @@ "Gebr", "Gebr.", "Gebu\u0308hren", + "Geb\u00fchren", "Geplantes", "Gesamtrendite", "H", "H.", + "Haltedauer", "Halten", "Halten-Strategie", "Hauptbahnhof", @@ -482,7 +529,12 @@ "Informationen", "Ing", "Ing.", + "Invastitionsfokus", + "Investftionsvolumen", + "Investitionen", + "Investitions-annahmen", "Investmentmanagers", + "Investmentzeitraum", "Investtionszeltraum", "It", "J", @@ -507,8 +559,11 @@ "K", "K.", "K.O.", + "Kapitalzusagen", "Kaufen", + "Keine", "Key", + "Klassifizierung", "Kosten", "L", "L'", @@ -543,12 +598,14 @@ "Milliarde", "Million", "Mindestanlage", + "Mindestens", "Mio", "Mio.", "Mitgliedschaft", "Mittwoch", "Mo", "Mo.", + "Monate", "Montag", "Mr", "Mr.", @@ -586,6 +643,8 @@ "OPCI", "O_O", "O_o", + "Objekt", + "Objekten", "Of", "Offen", "Offener", @@ -610,7 +669,9 @@ "Professor", "Prognostizierte", "Punkt", + "Q", "Qin", + "Quote", "R", "R.", "R.I.P.", @@ -625,6 +686,8 @@ "Red.", "Redaktion", "Regionen", + "Rendite-", + "Renovierungen", "Rents", "Residential", "Retail", @@ -633,6 +696,8 @@ "Rotterdam", "S", "S'", + "SFDR", + "SG-", "SPAIN", "SRI", "Sa", @@ -654,6 +719,7 @@ "Spezial-AlF", "St", "St.", + "Stabilisierung", "Standortaufwertungsstrategie", "Standorte", "Std", @@ -666,11 +732,13 @@ "Stra\u00dfe", "Struktur", "Strukturierungsgebu\u0308hr", + "Strukturierungsgeb\u00fchr", "Stunde", "St\u00e4dten", "S\u2019", "T", "T.", + "Tag", "Tausend", "Teflimmobilfe)-", "Tel", @@ -698,7 +766,9 @@ "V.V", "V_V", "Value", + "Verbesserungen", "Vergu\u0308tung", + "Verg\u00fctung", "Verkaufs", "Verkaufs-", "Vol", @@ -724,6 +794,7 @@ "XX", "XX.", "XXX", + "XXX-", "XXX.", "XXX/XXX", "XXXX", @@ -742,6 +813,7 @@ "Xxxx", "Xxxx+", "Xxxx+/Xxxxx", + "Xxxx-", "Xxxx-XXX", "Xxxx-Xxxxx-XXX", "Xxxx.", @@ -753,6 +825,7 @@ "Xxxxx-", "Xxxxx-XxX", "Xxxxx-Xxxxx", + "Xxxxx-xxxx", "Xxxxx\u0308xx", "Xxxxx\u0308xxxx", "Xxxx\u0308xxxx", @@ -763,11 +836,15 @@ "Ziel-LTY", "Ziel-Netto-IRR", "Zielanlagestrategie", + "Zielm\u00e4rkte", + "Zielobjektgr\u00f6\u00dfe", "Zielregionfen)/Jand", "Zielrendite", "Zielsektoren", "Zielvolumen", "Ziirraiaein", + "Zins-", + "Zinss\u00e4tze", "Zt", "Zt.", "[", @@ -806,6 +883,7 @@ "a.g.", "a.m.", "a.z.", + "ab", "abb", "abb.", "abk", @@ -832,10 +910,12 @@ "aiming", "ain", "ake", + "akt", "aktive", "aktueller", "al", "al.", + "ald", "alf", "all", "allg", @@ -849,15 +929,21 @@ "an", "an.", "and", + "angestrebt", + "angestrebter", + "ankauf", "ankaufs-", + "ankaufsphase", "anlagebedingungen", "anlagestrategien", "anlagesusschuss", "anlagevehikels", + "annahmen", "ans", "ansprechpartners", "ant", "antagevehikels", + "anteilseigner", "apr", "apr.", "are", @@ -867,6 +953,7 @@ "art", "artikel", "as", + "ase", "asset", "assetor", "assets", @@ -877,6 +964,8 @@ "attractive", "auf", "aufl\u00f6sung", + "aufnahme", + "auftakt", "aufwertung", "auf\u2019m", "aug", @@ -885,11 +974,15 @@ "auschittungsrendite", "auschu\u0308ttungsrendite", "ausgew\u00e4hlte", + "ausschlie\u00dflich", "ausschu\u0308ttung", "ausschu\u0308ttungsintervalle", "ausschu\u0308ttungsrandite", "ausschu\u0308ttungsrendite", + "aussch\u00fcttungsintervalle", + "aussch\u00fcttungsrandite", "aussch\u00fcttungsrendite", + "aussch\u00fcttungsrendites", "ave", "ax.", "b", @@ -904,6 +997,7 @@ "bd", "bd.", "be", + "bei", "beispielsweise", "belgium", "benefits", @@ -921,6 +1015,7 @@ "bhf.", "biol", "biol.", + "bis", "bk.", "bon", "br.", @@ -999,11 +1094,15 @@ "d.h.", "d.x", "dX", + "dXxx.\u20ac", "d_d", "d_x", "dam", + "darlehen", "dd", + "dd.dd.dddd", "ddd", + "ddd.ddd", "dddd", "de", "ded", @@ -1024,6 +1123,7 @@ "dgl.", "di", "di.", + "die", "different", "dipl", "dipl.", @@ -1042,6 +1142,7 @@ "du", "du\u2019s", "dv.", + "dxxx.\u20ac", "d\u00e4nemark", "d\u2019", "e", @@ -1060,13 +1161,16 @@ "ebd.", "ebenda", "ebr", + "ebt", "economic", "ed.", "eep", "egr", + "egt", "egy", "ehem", "ehem.", + "eigenkapital", "eigentlich", "eigtl", "eigtl.", @@ -1076,11 +1180,15 @@ "einen", "einer", "einmalige", + "eins", "einw", "einwohner", + "einzeln", "eit", + "ekt", "el.", "eld", + "eln", "els", "em.", "en.", @@ -1090,6 +1198,7 @@ "engl.", "englisch", "enhancing", + "ens", "ent", "entspr", "entspr.", @@ -1100,6 +1209,7 @@ "er.", "erd", "ere", + "erfolgten", "erg", "erm", "erm.", @@ -1110,12 +1220,14 @@ "ery", "er\u2019s", "es", + "esg-", "ess", "estate", "etc", "etc.", "etr", "ets", + "eur", "europe", "european", "ev", @@ -1127,6 +1239,7 @@ "experienced", "expertise", "exposure", + "externe", "extract", "ez.", "e\u2019s", @@ -1136,13 +1249,17 @@ "fa.", "fam", "fam.", + "fdr", "feb", "feb.", "fen", "festen", + "festgelegt", "festgelegter", "fierce", "fil", + "finanzierung", + "finanzierungskonditionen", "flagship", "fond", "fonds", @@ -1153,6 +1270,7 @@ "france", "frankreich", "franz\u00f6sisch", + "fremdkapital", "french", "frl", "frl.", @@ -1162,6 +1280,7 @@ "fs.", "fund", "fu\u0308hrende", + "f\u00fcr", "f\u2019m", "g", "g.", @@ -1173,6 +1292,7 @@ "gebr", "gebr.", "gebu\u0308hren", + "geb\u00fchren", "ged", "gedis", "gegebenenfalls", @@ -1185,6 +1305,7 @@ "gen", "geographies", "geplantes", + "gepr\u00fcft", "ger", "gerd", "gesamtrendite", @@ -1209,6 +1330,7 @@ "h.", "h.c", "h.c.", + "haltedauer", "halten", "halten-strategie", "have", @@ -1217,6 +1339,7 @@ "hd.", "hed", "hem", + "hen", "here", "hf.", "hg", @@ -1227,6 +1350,7 @@ "hinter\u2019m", "hip", "his", + "hme", "hotel", "hotels", "hr", @@ -1303,9 +1427,14 @@ "insb.", "insbesondere", "interesting", + "invastitionsfokus", + "investftionsvolumen", + "investitionen", + "investitions-annahmen", "investment", "investmentmanagers", "investments", + "investmentzeitraum", "investor", "investtionszeltraum", "inw", @@ -1313,6 +1442,7 @@ "iol", "ion", "ipl", + "ird", "irr", "is", "ise", @@ -1334,6 +1464,7 @@ "jahre", "jan", "jan.", + "jedes", "jh", "jh.", "jhd", @@ -1346,20 +1477,26 @@ "jun.", "jur", "jur.", + "j\u00e4hrlich", "j\u00e4hrliche", "k", "k.", "k.o.", + "kapitalzusagen", "kath", "kath.", "katholisch", "kaufen", + "keine", "kel", "ket", "key", "kl.", + "klassifizierung", "kosten", "kt.", + "kte", + "kus", "l", "l'", "l.", @@ -1452,6 +1589,7 @@ "mitgliedschaft", "mo", "mo.", + "monate", "monatlich", "mr", "mr.", @@ -1474,6 +1612,7 @@ "n.y.", "n.y.c.", "nach", + "nachhaltig", "nachvermietungsstrategie", "name", "nat", @@ -1493,6 +1632,7 @@ "ner", "nes", "netherlands", + "neu", "ng.", "ngl", "ngs", @@ -1505,6 +1645,7 @@ "nov.", "nr", "nr.", + "ns-", "nsb", "nse", "nts", @@ -1522,10 +1663,13 @@ "o.o", "o.\u00e4", "o.\u00e4.", + "o.\u20ac", "o_0", "o_O", "o_o", "objectives", + "objekt", + "objekten", "of", "of.", "ofQin", @@ -1561,6 +1705,7 @@ "original", "orm", "ors", + "ote", "outside", "ov.", "ove", @@ -1609,6 +1754,7 @@ "q.e.d.", "qin", "quality", + "quote", "r", "r.", "r.i.p.", @@ -1630,6 +1776,8 @@ "regionen", "remains", "ren", + "rendite-", + "renovierungen", "rent", "rents", "rer", @@ -1649,6 +1797,7 @@ "rm.", "rms", "rn.", + "rne", "rof", "room", "rotterdam", @@ -1687,7 +1836,10 @@ "sept.", "set", "sf.", + "sfdr", + "sg-", "sg.", + "sich", "sie", "sie\u2019s", "sis", @@ -1696,6 +1848,7 @@ "small", "so", "so.", + "sobald", "sog", "sog.", "some", @@ -1709,6 +1862,8 @@ "ss.", "st", "st.", + "stabilisieren", + "stabilisierung", "standortaufwertungsstrategie", "standorte", "std", @@ -1724,17 +1879,20 @@ "strategy", "struktur", "strukturierungsgebu\u0308hr", + "strukturierungsgeb\u00fchr", "st\u00e4dten", "sw.", "s\u2019", "t", "t.", "tactical", + "tag", "tal", "targeting", "targets", "tc.", "td.", + "te-", "ted", "tee", "teflimmobilfe)-", @@ -1765,6 +1923,7 @@ "tsd", "tsd.", "tur", + "tze", "t\u00e4gl", "t\u00e4gl.", "t\u00e4glich", @@ -1781,6 +1940,7 @@ "u.u.", "u.v.m", "u.v.m.", + "uer", "ufs", "ug.", "ugh", @@ -1797,6 +1957,7 @@ "univ.", "unless", "unter", + "unterstellt", "unter\u2019m", "ur.", "ure", @@ -1826,8 +1987,10 @@ "var", "vel", "ver", + "verbesserungen", "vergleiche", "vergu\u0308tung", + "verg\u00fctung", "verkaufs", "verkaufs-", "very", @@ -1855,9 +2018,11 @@ "walls", "way", "well-established", + "werden", "wertstabile", "why", "wir", + "wird", "wir\u2019s", "wiss", "wiss.", @@ -1895,6 +2060,7 @@ "xx.x", "xxXxx", "xxx", + "xxx-", "xxx-Xxxxx", "xxx-xxxx", "xxx.", @@ -1938,13 +2104,20 @@ "ziel-lty", "ziel-netto-irr", "zielanlagestrategie", + "zielm\u00e4rkte", + "zielobjektgr\u00f6\u00dfe", "zielregionfen)/jand", "zielrendite", "zielsektoren", "zielvolumen", "ziirraiaein", + "zins-", + "zinss\u00e4tze", "zt", + "zu", + "zum", "zw.", + "zwischen", "zzgl", "zzgl.", "{", @@ -1977,11 +2150,14 @@ "\u00f6sterr", "\u00f6sterr.", "\u00f6sterreichisch", + "\u00f6\u00dfe", "\u00fc", "\u00fc.", "\u00fcber", "\u00fcbersicht", "\u00fcber\u2019m", + "\u00fcft", + "\u00fchr", "\u0308hr", "\u0ca0", "\u0ca0_\u0ca0", diff --git a/prototypes/fine_tuning_spaCy/test_model.py b/prototypes/fine_tuning_spaCy/test_model.py index 277bd1e..37ff4a4 100644 --- a/prototypes/fine_tuning_spaCy/test_model.py +++ b/prototypes/fine_tuning_spaCy/test_model.py @@ -4,7 +4,7 @@ import json from pathlib import Path nlp = spacy.load("output/model-last") -input_pdf = Path("../ocr/output/Pitchbook 3-OCR.pdf") +input_pdf = Path("../ocr/output/Teaser 2 FINAL-OCR.pdf") doc = fitz.open(input_pdf) diff --git a/prototypes/fine_tuning_spaCy/training_data.py b/prototypes/fine_tuning_spaCy/training_data.py index 246b2d0..f51aebd 100644 --- a/prototypes/fine_tuning_spaCy/training_data.py +++ b/prototypes/fine_tuning_spaCy/training_data.py @@ -202,6 +202,30 @@ TRAINING_DATA = [ ( "3-4% dividend yield", {"entities": [[0, 4, "AUSSCHÜTTUNGSRENDITE"]]}, + ), + ( + "Zielmärkte Klassifizierung SFDR Invastitionsfokus Rendite- / Risikoprofil Rechtsform Eigenkapital /FK Quote Investftionsvolumen Prognostizierte Gesamtrendite {IRR)* Prognostizierte Ausschüttungsrandite* Mindestanlage Mitgliedschaft Im Anlagesusschuss Ankaufs- / Verkaufs- / Verkaufs(Teflimmobilfe)- / Baumanagementgebahr (inkl. USt.) Parformanceabhängige Vergütung Einmalige Strukturierungsgebühr Deutsche Metropolregianen und umliegende Regionen mit Städten >50T Einwohner Artikel 8 Wohnimmobilien Deutschland ‚Aktive Bestandsentwicklung Offener Spezial-AlF mit festen Anlagebedingungen rd. 200 Mio. € / max. 20% rd. 250 Mio. € 7,5 % (nach Kosten & Gebühren, vor Steuern) 8 4,0 % {nach Kosten & Gebühren, var Steuern} 5Mio.€ Ab 10 Mio. € 1,40 % / 0,80 % /2,12% / 4,91 % Laufzeit / Investtionszeltraum Ausschüttungsintervalle 20 % über einer @ Ausschüttungsrendite von 4,0 % 0,1% der bis zum 31.12.2023 erfolgten Kapitalzusagen (max. 200.000 &) 10 bis 12 Jahre / bis zu 24 Monate angestrebt Mindestens jährlich", + {"entities": [[945, 960, "Laufzeit"]]}, + ), + ( + "Laufzeit / Investtionszeltraum Ausschüttungsintervalle 20 % über einer @ Ausschüttungsrendite von 4,0 % 0,1% der bis zum 31.12.2023 erfolgten Kapitalzusagen (max. 200.000 &) 10 bis 12 Jahre / bis zu 24 Monate angestrebt", + {"entities": [[174, 189, "Laufzeit"]]}, + ), + ( + "10-12 Jahre Laufzeit bei einem LTV von bis zu 20%", + {"entities": [[0, 11, "Laufzeit"]]}, + ), + ( + "vom Manager festgelegter Stil Rechtsform Jahr des ersten Closings Laufzeit Geplantes Jahr der Auflösung Ziel-Netto-IRR / Gesamtrendite* Zielvolumen des Anlagevehikels Ziel-LTY‚Aktueller LTV Zielsektoren Zielanlagestrategie Fonds Offen Deutschland Core, Core + Offener Immobilien-Spezialfonds 2022 10 - 12 Jahre", + {"entities": [[297, 310, "Laufzeit"], [247, 259, "Risikoprofil"]]}, + ), + ( + "Allgemeine Annahmen Ankaufsphase Haltedauer Zielobjektgröße Finanzierung Investitions-annahmen Zielrendite 24 Monate Investmentzeitraum 10 Jahre (+) EUR 20-75 Mio. Keine externe Finanzierung zum Auftakt (ausschließlich Darlehen der Anteilseigner). Die Finanzierung wird nach der Ankaufsphase und Stabilisierung der Zinssätze neu geprüft. Angestrebter LTV zwischen 25-40 % Investitionen für Renovierungen und ESG- Verbesserungen werden für jedes Objekt einzeln festgelegt. 5,00-5,25 % Ausschüttungsrendites", + {"entities": [[136, 148, "Laufzeit"], [472, 483, "Ausschüttungsrendite"]]}, + ), + ( + "Zielrendite 5,00-5,25 % Ausschüttungsrendite 1) Ankauf von Objekten an Tag eins mit 100% Eigenkapital. Die Strategie unterstellt die Aufnahme von Fremdkapital, sobald sich die Zins- und Finanzierungskonditionen nachhaltig stabilisieren. Strategie - Übersicht Risikoprofil Core+", + {"entities": [[12, 23, "Ausschüttungsrendite"], [272, 277, "Risikoprofil"]]}, ) ]