>>> doc=nlp("un anello d'argento")
>>> doc
[
[
{
"id": 1,
"text": "un",
"lemma": "uno",
"upos": "DET",
"xpos": "RI",
"feats": "Definite=Ind|Gender=Masc|Number=Sing|PronType=Art",
"head": 2,
"deprel": "det",
"start_char": 0,
"end_char": 2,
"ner": "O",
"multi_ner": [
"O"
]
},
{
"id": 2,
"text": "anello",
"lemma": "anello",
"upos": "NOUN",
"xpos": "S",
"feats": "Gender=Masc|Number=Sing",
"head": 0,
"deprel": "root",
"start_char": 3,
"end_char": 9,
"ner": "O",
"multi_ner": [
"O"
]
},
{
"id": 3,
"text": "d'",
"lemma": "di",
"upos": "ADP",
"xpos": "E",
"head": 4,
"deprel": "case",
"start_char": 10,
"end_char": 12,
"ner": "O",
"multi_ner": [
"O"
],
"misc": "SpaceAfter=No"
},
{
"id": 4,
"text": "argento",
"lemma": "argento",
"upos": "NOUN",
"xpos": "S",
"feats": "Gender=Masc|Number=Sing",
"head": 2,
"deprel": "nmod",
"start_char": 12,
"end_char": 19,
"ner": "O",
"multi_ner": [
"O"
],
"misc": "SpaceAfter=No"
}
]
]
E lo stesso per andirivieni:
>>> nlp("un gran andirivieni per casa")
[
[
{
"id": 1,
"text": "un",
"lemma": "uno",
"upos": "DET",
"xpos": "RI",
"feats": "Definite=Ind|Gender=Masc|Number=Sing|PronType=Art",
"head": 3,
"deprel": "det",
"start_char": 0,
"end_char": 2,
"ner": "O",
"multi_ner": [
"O"
]
},
{
"id": 2,
"text": "gran",
"lemma": "grande",
"upos": "ADJ",
"xpos": "A",
"feats": "Number=Sing",
"head": 3,
"deprel": "amod",
"start_char": 3,
"end_char": 7,
"ner": "O",
"multi_ner": [
"O"
]
},
{
"id": 3,
"text": "andirivieni",
"lemma": "andirivieno",
"upos": "NOUN",
"xpos": "S",
"feats": "Gender=Masc|Number=Plur",
"head": 0,
"deprel": "root",
"start_char": 8,
"end_char": 19,
"ner": "O",
"multi_ner": [
"O"
]
},
{
"id": 4,
"text": "per",
"lemma": "per",
"upos": "ADP",
"xpos": "E",
"head": 5,
"deprel": "case",
"start_char": 20,
"end_char": 23,
"ner": "O",
"multi_ner": [
"O"
]
},
{
"id": 5,
"text": "casa",
"lemma": "casa",
"upos": "NOUN",
"xpos": "S",
"feats": "Gender=Fem|Number=Sing",
"head": 3,
"deprel": "nmod",
"start_char": 24,
"end_char": 28,
"ner": "O",
"multi_ner": [
"O"
],
"misc": "SpaceAfter=No"
}
]
]
On 24 May 2024, at 12:00, nexa-request@server-nexa.polito.it wrote:From: Antonio <antonio@piumarossa.it>
To: nexa@server-nexa.polito.it
Subject: Re: [nexa] Minerva, l'IA italiana al bivio tra Vannacci e
Manzon
Message-ID: <20240523204128.9de4aa7a7c4a195ae21c3ba8@piumarossa.it>
Content-Type: text/plain; charset=ISO-8859-1secondo me, con opportuni adattamenti, varrebbe la pena di provare a vedere
se migliora un LLM 'pure Italian'
Prima, secondo me, andrebbe perfezionato 'stanza'.
Ad esempio:
nlp = stanza.Pipeline('it')
doc = nlp("agnello")
...
Result: a-nnlo-lo
"id": 1,
"lemma": "a",
"upos": "ADP",
"xpos": "E",
"deprel": "case"
--
"id": 2,
"lemma": "nnlo",
"upos": "NOUN",
"xpos": "S",
"feats": "Gender=Masc|Number=Sing",
"head": 0,
"deprel": "root"
--
"id": 3,
"lemma": "lo",
"upos": "PRON",
"xpos": "PC",
"feats": "Clitic=Yes|Gender=Masc|Number=Sing|Person=3|PronType=Prs",
"deprel": "discourse"
I clitici in genere li trova:
doc = nlp("mangiarselo")
"lemma": "se",
"feats": "Clitic=Yes|Person=3|PronType=Prs",
...
"lemma": "lo",
"feats": "Clitic=Yes|Person=3|PronType=Prs",