diff --git a/config/api_router.py b/config/api_router.py index b9c2b59..2889ac1 100644 --- a/config/api_router.py +++ b/config/api_router.py @@ -2,8 +2,7 @@ from rest_framework.routers import DefaultRouter, SimpleRouter from reference.api.v1.views import ReferenceViewSet - -app_name = "reference" +from markup_doc.api.v1.views import ArticleViewSet if settings.DEBUG: router = DefaultRouter() @@ -11,5 +10,6 @@ router = SimpleRouter() router.register("reference", ReferenceViewSet, basename="reference") +router.register("first_block", ArticleViewSet, basename="first_block") urlpatterns = router.urls \ No newline at end of file diff --git a/config/settings/base.py b/config/settings/base.py index b6977c6..4d9ce5d 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -82,6 +82,8 @@ "reference", "xml_manager", "model_ai", + "markup_doc", + "markuplib", ] INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS + WAGTAIL diff --git a/fixtures/Artigo 5.docx b/fixtures/Artigo 5.docx new file mode 100644 index 0000000..5fbf592 Binary files /dev/null and b/fixtures/Artigo 5.docx differ diff --git a/fixtures/e14790.docx b/fixtures/e14790.docx new file mode 100644 index 0000000..36bb9d0 Binary files /dev/null and b/fixtures/e14790.docx differ diff --git a/fixtures/e740.docx b/fixtures/e740.docx new file mode 100644 index 0000000..25240d4 Binary files /dev/null and b/fixtures/e740.docx differ diff --git a/markup_doc/__init__.py b/markup_doc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/markup_doc/admin.py b/markup_doc/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/markup_doc/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/markup_doc/api/__init__.py b/markup_doc/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/markup_doc/api/v1/__init__.py b/markup_doc/api/v1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/markup_doc/api/v1/serializers.py b/markup_doc/api/v1/serializers.py new file mode 100644 index 0000000..c099f0e --- /dev/null +++ b/markup_doc/api/v1/serializers.py @@ -0,0 +1,7 @@ +from rest_framework import serializers +from markup_doc.models import ArticleDocx + +class ArticleDocxSerializer(serializers.ModelSerializer): + class Meta: + model = ArticleDocx + fields = "__all__" \ No newline at end of file diff --git a/markup_doc/api/v1/views.py b/markup_doc/api/v1/views.py new file mode 100755 index 0000000..66938bf --- /dev/null +++ b/markup_doc/api/v1/views.py @@ -0,0 +1,43 @@ +from django.shortcuts import render +from django.http import JsonResponse +from rest_framework.permissions import IsAuthenticated +from rest_framework.viewsets import GenericViewSet +from rest_framework.mixins import CreateModelMixin +from rest_framework.response import Response +from markup_doc.api.v1.serializers import ArticleDocxSerializer +from markup_doc.marker import mark_article + +import json + +# Create your views here. + +class ArticleViewSet( + GenericViewSet, # generic view functionality + CreateModelMixin, # handles POSTs +): + serializer_class = ArticleDocxSerializer + permission_classes = [IsAuthenticated] + http_method_names = [ + "post", + ] + + def create(self, request, *args, **kwargs): + return self.api_article(request) + + def api_article(self, request): + try: + data = json.loads(request.body) + post_text = data.get('text') # Obtiene el parámetro + post_metadata = data.get('metadata') # Obtiene el parámetro + + resp_data = mark_article(post_text, post_metadata) + + response_data = { + 'message': resp_data, + } + except json.JSONDecodeError: + response_data = { + 'error': 'Error processing' + } + + return JsonResponse(response_data) \ No newline at end of file diff --git a/markup_doc/apps.py b/markup_doc/apps.py new file mode 100644 index 0000000..87efcb1 --- /dev/null +++ b/markup_doc/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class MarkupDocConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "markup_doc" diff --git a/markup_doc/choices.py b/markup_doc/choices.py new file mode 100644 index 0000000..d113911 --- /dev/null +++ b/markup_doc/choices.py @@ -0,0 +1,121 @@ +front_labels = [ + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('', ''), + ('

', '

'), + ('', ''), + ('', ''), + ('', ''), + ('', '
'), + ('', ''), + ('', '<title>'), + ('<trans-abstract>', '<trans-abstract>'), + ('<trans-title>', '<trans-title>'), + ('<translate-front>', '<translate-front>'), + ('<translate-body>', '<translate-body>'), + ('<disp-formula>', '<disp-formula>'), + ('<inline-formula>', '<inline-formula>'), + ('<formula>', '<formula>'), + +] + +order_labels = { + '<article-id>':{ + 'pos' : 1, + 'next' : '<subject>' + }, + '<subject>':{ + 'pos' : 2, + 'next' : '<article-title>' + }, + '<article-title>':{ + 'pos' : 3, + 'next' : '<trans-title>', + 'lan' : True + }, + '<trans-title>':{ + 'size' : 14, + 'bold' : True, + 'lan' : True, + 'next' : '<contrib>' + }, + '<contrib>':{ + 'reset' : True, + 'size' : 12, + 'next' : '<aff>' + }, + '<aff>':{ + 'reset' : True, + 'size' : 12, + }, + '<abstract>':{ + 'size' : 12, + 'bold' : True, + 'lan' : True, + 'next' : '<p>' + }, + '<p>':{ + 'size' : 12, + 'next' : '<p>', + 'repeat' : True + }, + '<trans-abstract>':{ + 'size' : 12, + 'bold' : True, + 'lan' : True, + 'next' : '<p>' + }, + '<kwd-group>':{ + 'size' : 12, + 'regex' : r'(?i)(palabra.*clave.*:|keyword.*:)', + }, + '<history>':{ + 'size' : 12, + 'regex' : r'\d{2}/\d{2}/\d{4}', + }, + '<corresp>':{ + 'size' : 12, + 'regex' : r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' + }, + '<sec>':{ + 'size' : 16, + 'bold' : True, + 'next' : None + }, + '<sub-sec>':{ + 'size' : 12, + 'italic' : True, + 'next' : None + }, + '<sub-sec-2>':{ + 'size' : 14, + 'bold' : True, + 'next' : None + }, +} + +order_labels_body = { + '<sec>':{ + 'size' : 16, + 'bold' : True, + }, + '<sub-sec>':{ + 'size' : 12, + 'italic' : True, + }, + '<p>':{ + 'size' : 12, + }, +} \ No newline at end of file diff --git a/markup_doc/forms.py b/markup_doc/forms.py new file mode 100644 index 0000000..e8abe2e --- /dev/null +++ b/markup_doc/forms.py @@ -0,0 +1 @@ +from wagtail.admin.forms.models import WagtailAdminModelForm diff --git a/markup_doc/labeling_utils.py b/markup_doc/labeling_utils.py new file mode 100644 index 0000000..d0247f1 --- /dev/null +++ b/markup_doc/labeling_utils.py @@ -0,0 +1,1138 @@ +# Standard library imports +import json +import re +import requests + +from lxml import etree + +# Third-party imports +from django.contrib.auth import get_user_model +from rest_framework_simplejwt.tokens import RefreshToken + +# Local application imports +from model_ai.models import LlamaModel +from .choices import order_labels + + +MODEL_NAME_GEMINI = 'GEMINI' +MODEL_NAME_LLAMA = 'LLAMA' + + +def get_llm_model_name(): + # FIXME: This function always fetches the first LlamaModel instance. + model_ai = LlamaModel.objects.first() + + if model_ai.api_key_gemini: + return MODEL_NAME_GEMINI + else: + return MODEL_NAME_LLAMA + + +def split_in_three(obj_reference, chunk_size=5): + if not obj_reference: + return [] + return [obj_reference[i:i + chunk_size] + for i in range(0, len(obj_reference), chunk_size)] + + +User = get_user_model() + + +def process_reference(num_ref, obj, user_id): + payload = { + 'reference': obj['value']['paragraph'] + } + + # FIXME: This function always fetches the first LlamaModel instance. + model = LlamaModel.objects.first() + + if model.name_file: + user = User.objects.get(pk=user_id) + refresh = RefreshToken.for_user(user) + access_token = refresh.access_token + + #url = "http://172.17.0.1:8400/api/v1/mix_citation/reference/" + #url = "http://172.17.0.1:8009/api/v1/mix_citation/reference/" + + # FIXME: Hardcoded URL + url = "http://django:8000/api/v1/reference/" + + headers = { + 'Authorization': f'Bearer {access_token}', + 'Content-Type': 'application/json' + } + + response = requests.post(url, json=payload, headers=headers) + + if response.status_code == 200: + response_json = response.json() + message_str = response_json['message'] + + json_str = message_str.replace('reference: ', '', 1) + + ref_json = json.loads(json_str) + + #ref_json = json.loads(response) + obj['type'] = 'ref_paragraph' + obj['value'] = { + 'paragraph': ref_json.get('full_text', None), + 'label': '<p>', + 'reftype': ref_json.get('reftype', None), + 'refid': 'B'+str(num_ref), + 'date': ref_json.get('date', None), + 'title': ref_json.get('title', None), + 'chapter': ref_json.get('chapter', None), + 'edition': ref_json.get('edition', None), + 'source': ref_json.get('source', None), + 'vol': ref_json.get('vol', None), + 'issue': ref_json.get('num', None), + 'pages': ref_json.get('pages', None), + 'lpage': ref_json.get('lpage', None), + 'fpage': ref_json.get('fpage', None), + 'doi': ref_json.get('doi', None), + 'access_id': ref_json.get('access_id', None), + 'degree': ref_json.get('degree', None), + 'organization': ref_json.get('organization', None), + 'location': ref_json.get('location', None), + 'org_location': ref_json.get('org_location', None), + 'num_pages': ref_json.get('num_pages', None), + 'uri': ref_json.get('uri', None), + 'version': ref_json.get('version', None), + 'access_date': ref_json.get('access_date', None), + 'authors': [] + } + authors = ref_json.get('authors', []) + for author in authors: + obj_auth = {} + obj_auth['type'] = 'Author' + obj_auth['value'] = {} + obj_auth['value']['surname'] = author.get('surname', None) + obj_auth['value']['given_names'] = author.get('fname', None) + obj['value']['authors'].append(obj_auth) + + return obj + + +def process_references(num_refs, references): + arr_references = [] + + for i, ref_json in enumerate(references): + obj = {} + obj['type'] = 'ref_paragraph' + obj['value'] = { + 'paragraph': ref_json.get('full_text', None), + 'label': '<p>', + 'reftype': ref_json.get('reftype', None), + 'refid': 'B'+str(num_refs[i] if i < len(num_refs) else ''), + 'date': ref_json.get('date', None), + 'title': ref_json.get('title', None), + 'chapter': ref_json.get('chapter', None), + 'edition': ref_json.get('edition', None), + 'source': ref_json.get('source', None), + 'vol': ref_json.get('vol', None), + 'issue': ref_json.get('num', None), + 'pages': ref_json.get('pages', None), + 'lpage': ref_json.get('lpage', None), + 'fpage': ref_json.get('fpage', None), + 'doi': ref_json.get('doi', None), + 'access_id': ref_json.get('access_id', None), + 'degree': ref_json.get('degree', None), + 'organization': ref_json.get('organization', None), + 'location': ref_json.get('location', None), + 'org_location': ref_json.get('org_location', None), + 'num_pages': ref_json.get('num_pages', None), + 'uri': ref_json.get('uri', None), + 'version': ref_json.get('version', None), + 'access_date': ref_json.get('access_date', None), + 'authors': [] + } + authors = ref_json.get('authors', []) + for author in authors: + obj_auth = {} + obj_auth['type'] = 'Author' + obj_auth['value'] = {} + obj_auth['value']['surname'] = author.get('surname', None) + obj_auth['value']['given_names'] = author.get('fname', None) + obj['value']['authors'].append(obj_auth) + arr_references.append(obj) + + return arr_references + + +def buscar_refid_por_surname_y_date(data_back, surname_buscado, date_buscado): + """ + Busca un bloque RefParagraphBlock que contenga un author con el surname especificado + y que coincida con la fecha dada. Retorna el refid si encuentra una coincidencia. + """ + for bloque in data_back: # Reemplaza 'contenido' con el nombre de tu StreamField + if bloque['type'] == 'ref_paragraph': # o el nombre que usaste en el StreamField + data = bloque['value'] + + # Verificar la fecha + if str(data.get('date')) != str(date_buscado[:4]): + continue + + # Revisar autores + authors = data.get('authors', []) + + surname_buscados = surname_buscado.split(',') + + for surname_buscado in surname_buscados: + if ' y ' in surname_buscado or ' and ' in surname_buscado or ' e ' in surname_buscado or ' & ' in surname_buscado: + if ' y ' in surname_buscado: + surname1 = surname_buscado.split(' y ')[0].strip().lower() + surname2 = surname_buscado.split(' y ')[1].strip().lower() + + if ' and ' in surname_buscado: + surname1 = surname_buscado.split(' and ')[0].strip().lower() + surname2 = surname_buscado.split(' and ')[1].strip().lower() + + if ' & ' in surname_buscado: + surname1 = surname_buscado.split(' & ')[0].strip().lower() + surname2 = surname_buscado.split(' & ')[1].strip().lower() + + if ' e ' in surname_buscado: + surname1 = surname_buscado.split(' e ')[0].strip().lower() + surname2 = surname_buscado.split(' e ')[1].strip().lower() + + for author_bloque in authors: + if author_bloque['type'] == 'Author': + author_data = author_bloque['value'] + if surname1 in (author_data.get('surname') or '').lower() + ' ' + (author_data.get('given_names') or '').lower(): + for author_bloque2 in authors: + if author_bloque2['type'] == 'Author': + author_data = author_bloque2['value'] + if surname2 in (author_data.get('surname') or '').lower() + ' ' + (author_data.get('given_names') or '').lower(): + return data.get('refid') + + for author_bloque in authors: + if author_bloque['type'] == 'Author': + author_data = author_bloque['value'] + + if surname_buscado.strip().lower() in (author_data.get('surname') or '').lower() + ' ' + (author_data.get('given_names') or '').lower(): + return data.get('refid') + + if surname_buscado.strip().lower() in (data.get('paragraph') or '').lower(): + return data.get('refid') + + return None + + +def extract_citation_apa(texto, data_back): + """ + Extrae citas en formato APA dentro de un texto y devuelve: + - la cita completa, + - el primer autor, + - el año. + Acepta múltiples espacios entre palabras y símbolos. + """ + + # Preposiciones comunes en apellidos + preposiciones = r'(?:de|del|la|los|las|da|do|dos|das|van|von)' + # Apellido compuesto o con preposición (incluye caracteres portugueses: ç, ã, õ, etc.) + apellido = rf'[A-ZÁÉÍÓÚÑÇÃÕÂÊÎÔÛ][a-záéíóúñçãõâêîôû]+(?:[-‐\s]+(?:{preposiciones})?\s*[A-ZÁÉÍÓÚÑÇÃÕÂÊÎÔÛ]?[a-záéíóúñçãõâêîôû]+)*' + resultados = [] + + # 1. Buscar todas las citas dentro de paréntesis + for paren in re.finditer(r'\(([^)]+)\)', texto): + contenido_completo = paren.group(1) + + # Si hay punto y coma, dividir las citas + if ';' in contenido_completo: + partes = [parte for parte in contenido_completo.split(';')] + else: + partes = [contenido_completo] + + # Variable para rastrear el contexto de autores dentro del mismo paréntesis + autores_en_parentesis = [] + + for i, parte in enumerate(partes): + parte = parte # Agregar strip aquí para limpiar espacios + if not parte: + continue + + # Caso especial: solo año (para citas como "2017" después de "2014;") + # MEJORADO: Solo aplicar si hay citas previas EN EL MISMO PARÉNTESIS + if re.match(r'^\s*\d{4}[a-z]?\s*$', parte): + # Solo usar el último autor si hay autores en el mismo paréntesis + if autores_en_parentesis: + ultimo_autor = autores_en_parentesis[-1] + refid = buscar_refid_por_surname_y_date(data_back, ultimo_autor, parte) + resultados.append({ + "cita": parte, + "autor": ultimo_autor, + "anio": parte, + "refid": refid + }) + # Si no hay autores previos en el paréntesis, ignorar (posible error) + continue + + # Patrones para diferentes tipos de citas + cita_encontrada = False + + # Patrón 1: Múltiples autores con & y coma antes del año + # Ejemplo: "Porta, Lopez-De-Silanes, & Shleifer, 1999" + pattern1 = rf'(?P<autores>{apellido}(?:\s*,\s*{apellido})*\s*,?\s*&\s*{apellido})\s*,\s*(?P<anio>\d{{4}}[a-z]?)' + match = re.search(pattern1, parte) + if match: + autores_completos = match.group("autores") + anio = match.group("anio") + primer_autor = re.split(r'\s*,\s*', autores_completos)[0] + autores_en_parentesis.append(primer_autor) + refid = buscar_refid_por_surname_y_date(data_back, primer_autor, anio) + resultados.append({ + "cita": parte, + "autor": primer_autor, + "anio": anio, + "refid": refid + }) + cita_encontrada = True + + # Patrón 2: Múltiples autores con & SIN coma antes del año + # Ejemplo: "Silva, Peixoto, & Tizziotti 2021" + if not cita_encontrada: + pattern2 = rf'(?P<autores>{apellido}(?:\s*,\s*{apellido})*\s*,?\s*&\s*{apellido})\s+(?P<anio>\d{{4}}[a-z]?)' + match = re.search(pattern2, parte) + if match: + autores_completos = match.group("autores") + anio = match.group("anio") + primer_autor = re.split(r'\s*,\s*', autores_completos)[0] + autores_en_parentesis.append(primer_autor) + refid = buscar_refid_por_surname_y_date(data_back, primer_autor, anio) + resultados.append({ + "cita": parte, + "autor": primer_autor, + "anio": anio, + "refid": refid + }) + cita_encontrada = True + + # Patrón 3: Dos autores con & (simple) + # Ejemplo: "Crisóstomo & Brandão, 2019" + if not cita_encontrada: + pattern3 = rf'(?P<autor1>{apellido})\s*&\s*(?P<autor2>{apellido})\s*,\s*(?P<anio>\d{{4}}[a-z]?)' + match = re.search(pattern3, parte) + if match: + primer_autor = match.group("autor1") + anio = match.group("anio") + autores_en_parentesis.append(primer_autor) + refid = buscar_refid_por_surname_y_date(data_back, primer_autor, anio) + resultados.append({ + "cita": parte, + "autor": primer_autor, + "anio": anio, + "refid": refid + }) + cita_encontrada = True + + # Patrón 4: Autor con "et al." con coma + # Ejemplo: "Brandão et al., 2019" + if not cita_encontrada: + pattern4 = rf'(?P<autor>{apellido})\s+et\s+al\s*\.?\s*,\s*(?P<anio>\d{{4}}[a-z]?)' + match = re.search(pattern4, parte) + if match: + autor = match.group("autor") + anio = match.group("anio") + autores_en_parentesis.append(autor) + refid = buscar_refid_por_surname_y_date(data_back, autor, anio) + resultados.append({ + "cita": parte, + "autor": autor, + "anio": anio, + "refid": refid + }) + cita_encontrada = True + + # Patrón 5: Autor con "et al." sin coma + # Ejemplo: "Brandão et al. 2019" + if not cita_encontrada: + pattern5 = rf'(?P<autor>{apellido})\s+et\s+al\s*\.?\s+(?P<anio>\d{{4}}[a-z]?)' + match = re.search(pattern5, parte) + if match: + autor = match.group("autor") + anio = match.group("anio") + autores_en_parentesis.append(autor) + refid = buscar_refid_por_surname_y_date(data_back, autor, anio) + resultados.append({ + "cita": parte, + "autor": autor, + "anio": anio, + "refid": refid + }) + cita_encontrada = True + + # Patrón 6: Múltiples autores solo con comas (sin &) + # Ejemplo: "Adam, Tene, Mucci, Beck, 2020" o "Correia, Amaral, Louvet, 2014a" + if not cita_encontrada: + pattern6 = rf'(?P<autores>{apellido}(?:\s*,\s*{apellido}){{2,}})\s*,\s*(?P<anio>\d{{4}}[a-z]?)' + match = re.search(pattern6, parte) + if match: + autores_completos = match.group("autores") + anio = match.group("anio") + primer_autor = re.split(r'\s*,\s*', autores_completos)[0] + autores_en_parentesis.append(primer_autor) + refid = buscar_refid_por_surname_y_date(data_back, primer_autor, anio) + resultados.append({ + "cita": parte, + "autor": primer_autor, + "anio": anio, + "refid": refid + }) + cita_encontrada = True + + # Patrón 7: Autor simple con coma + # Ejemplo: "Smith, 2020" + if not cita_encontrada: + pattern7 = rf'(?P<autor>{apellido})\s*,\s*(?P<anio>\d{{4}}[a-z]?)' + match = re.search(pattern7, parte) + if match: + autor = match.group("autor") + anio = match.group("anio") + autores_en_parentesis.append(autor) + refid = buscar_refid_por_surname_y_date(data_back, autor, anio) + resultados.append({ + "cita": parte, + "autor": autor, + "anio": anio, + "refid": refid + }) + cita_encontrada = True + + # 2. Citas fuera del paréntesis: Nombre (2000) o Nombre et al. (2000) + # MEJORADO: Filtrar citas que están precedidas por preposiciones + + # Lista de preposiciones a evitar + preposiciones_evitar = ['de', 'del', 'la', 'los', 'las', 'da', 'do', 'dos', 'das', 'van', 'von'] + + # Patrón para citas con múltiples años: Autor (2018, 2019) + patron_multiples_años = rf'(?P<autor>{apellido})(?:\s*[-‐]\s*{apellido})*(?:\s+et\s+al\s*\.?|\s+(?:y|and|&)\s+{apellido})?\s*\(\s*(?P<años>\d{{4}}[a-z]?(?:\s*,\s*\d{{4}}[a-z]?)+)\s*\)' + + for match in re.finditer(patron_multiples_años, texto): + # Verificar que no haya preposición antes de la cita + inicio_match = match.start() + texto_anterior = texto[:inicio_match].split() + + # Si hay palabras antes y la última es una preposición, saltarse esta cita + if texto_anterior and texto_anterior[-1].lower() in preposiciones_evitar: + continue + + autor = match.group("autor") + años_str = match.group("años") + # Separar los años y crear una cita para cada uno + años = [año for año in años_str.split(',')] + + for año in años: + refid = buscar_refid_por_surname_y_date(data_back, autor, año) + resultados.append({ + "cita": f"{autor} et al. ({año})" if "et al" in match.group(0) else f"{autor} ({año})", + "autor": autor, + "anio": año, + "refid": refid + }) + + # Patrón para citas simples: Nombre (2000) o Nombre et al. (2000) + patron_afuera = rf'(?P<autor>{apellido})(?:\s*[-‐]\s*{apellido})*(?:\s+et\s+al\s*\.?|\s+(?:y|and|&)\s+{apellido})?\s*\(\s*(?P<anio>\d{{4}}[a-z]?)\s*\)' + + for match in re.finditer(patron_afuera, texto): + # Verificar que no haya preposición antes de la cita + inicio_match = match.start() + texto_anterior = texto[:inicio_match].split() + + # Si hay palabras antes y la última es una preposición, saltarse esta cita + if texto_anterior and texto_anterior[-1].lower() in preposiciones_evitar: + continue + + autor = match.group("autor") + anio = match.group("anio") + + # Verificar que no sea parte de una cita con múltiples años ya procesada + cita_completa = match.group(0) + es_multiple = False + for resultado in resultados: + if resultado["autor"] == autor and resultado["anio"] == anio and "," in cita_completa: + es_multiple = True + break + + if not es_multiple: + refid = buscar_refid_por_surname_y_date(data_back, autor, anio) + resultados.append({ + "cita": cita_completa, + "autor": autor, + "anio": anio, + "refid": refid + }) + + return resultados + + +def clean_labels(texto): + """ + Elimina todas las etiquetas XML del texto. + """ + # Patrón para encontrar etiquetas XML (apertura y cierre) + patron_etiquetas = r'<[^>]+>' + texto_limpio = re.sub(patron_etiquetas, '', texto) + + # Limpiar espacios múltiples que puedan haber quedado + #texto_limpio = re.sub(r'\s+', ' ', texto_limpio) + + return texto_limpio#.strip() + + +def map_text(texto): + """ + Crea un mapa de TODO lo que esté etiquetado en el texto. + Clave: texto sin etiquetas, Valor: texto con etiquetas + """ + mapa = {} + + # Buscar TODAS las etiquetas y su contenido + patron = r'<[^>]+>.*?</[^>]+>|<[^/>]+/>' + matches = re.findall(patron, texto, re.DOTALL) + + for match in matches: + contenido_limpio = clean_labels(match)#.strip() + if contenido_limpio: # Solo si hay contenido real + mapa[contenido_limpio] = match#.strip() + + return mapa + + +def search_position(texto, substring): + """ + Encuentra todas las posiciones donde aparece un substring en el texto. + """ + posiciones = [] + inicio = 0 + while True: + pos = texto.find(substring, inicio) + if pos == -1: + break + posiciones.append((pos, pos + len(substring))) + inicio = pos + 1 + return posiciones + + +def extract_labels(texto_original, texto_limpio, pos_inicio, pos_fin): + """ + Extrae un fragmento específico del texto original basado en posiciones del texto limpio. + """ + contador_chars_limpios = 0 + resultado = "" + dentro_del_rango = False + + i = 0 + while i < len(texto_original) and contador_chars_limpios <= pos_fin: + char = texto_original[i] + + if char == '<': + # Encontrar el final de la etiqueta + fin_etiqueta = texto_original.find('>', i) + if fin_etiqueta != -1: + etiqueta = texto_original[i:fin_etiqueta + 1] + + # Si estamos dentro del rango, incluir la etiqueta + if dentro_del_rango: + resultado += etiqueta + + i = fin_etiqueta + 1 + continue + + # Verificar si entramos o salimos del rango + if contador_chars_limpios == pos_inicio: + dentro_del_rango = True + elif contador_chars_limpios == pos_fin: + dentro_del_rango = False + break + + # Si estamos dentro del rango, incluir el caracter + if dentro_del_rango: + resultado += char + + contador_chars_limpios += 1 + i += 1 + + return resultado + + +def restore_labels_ref(ref, mapa_etiquetado, texto_original, texto_limpio): + """ + Restaura las etiquetas en una referencia específica usando el mapa y verificando posición. + Solo reemplaza si el contenido estaba realmente etiquetado en esa posición específica. + """ + # Encontrar todas las posiciones donde aparece esta ref en el texto limpio + posiciones_ref = search_position(texto_limpio, ref) + + if not posiciones_ref: + return ref + + # Para cada posición, extraer el fragmento original y ver si contiene etiquetas + mejores_candidatos = [] + + for pos_inicio, pos_fin in posiciones_ref: + fragmento_original = extract_labels( + texto_original, texto_limpio, pos_inicio, pos_fin + ) + + # Si el fragmento original es diferente al ref, significa que tenía etiquetas + if fragmento_original != ref: + mejores_candidatos.append(fragmento_original) + + # Si encontramos candidatos con etiquetas, devolver el primero + if mejores_candidatos: + return mejores_candidatos[0] + + # Si no hay candidatos con etiquetas, devolver el ref original sin modificar + return ref + + +def proccess_labeled_text(texto, data_back): + """ + Procesa un texto eliminando etiquetas XML, extrae citas APA y las devuelve + con sus etiquetas originales restauradas. + + Args: + texto (str): Texto original con etiquetas XML + extraer_citas_apa (function): Función que extrae citas del texto limpio + + Returns: + list: Lista de citas con etiquetas XML restauradas + """ + + # Crear mapa de transformaciones + mapa_transformaciones = map_text(texto) + #print(f"mapa: {mapa_transformaciones}") + + # Limpiar texto eliminando etiquetas + texto_limpio = clean_labels(texto) + + # Extraer citas del texto limpio + refs = extract_citation_apa(texto_limpio, data_back) + #print(f"refs: {refs}") + + # 4. Para cada ref, usar posición para restaurar solo lo que realmente estaba etiquetado + refs_con_etiquetas = [] + for ref in refs: + ref_restaurada = ref + ref_restaurada['cita'] = restore_labels_ref(ref['cita'], mapa_transformaciones, texto, texto_limpio) + refs_con_etiquetas.append(ref_restaurada) + + return refs_con_etiquetas + + +def match_by_regex(text, order_labels): + return next( + (key_obj for key_obj in order_labels.items() + if "regex" in key_obj[1] and re.search(key_obj[1]["regex"], text)), + None + ) + + +def match_by_style_and_size(item, order_labels, style='bold'): + return next( + (key_obj for key_obj in order_labels.items() + if "size" in key_obj[1] and style in key_obj[1] and + key_obj[1]["size"] == item.get('font_size') and + key_obj[1][style] == item.get(style)), + None + ) + + +def match_next_label(item, label_next, order_labels): + return next( + (key_obj for key_obj in order_labels.items() + if "size" in key_obj[1] and key_obj[1]["size"] == item.get('font_size') + and key_obj[0] == label_next), + None + ) + + +def match_paragraph(item, order_labels): + return next( + (key_obj for key_obj in order_labels.items() + if "size" in key_obj[1] and + "next" in key_obj[1] and + key_obj[1]["size"] == item.get('font_size') and + key_obj[1]["next"] == "<p>"), + None + ) + + +def match_section(item, sections): + return {'label': '<sec>', 'body': True} if ( + item.get('font_size') == sections[0].get('size') and + item.get('bold') == sections[0].get('bold') and + item.get('text', '').isupper() == sections[0].get('isupper') + ) else None + + +def match_subsection(item, sections): + return {'label': '<sub-sec>', 'body': True} if ( + item.get('font_size') == sections[1].get('size') and + item.get('bold') == sections[1].get('bold') and + item.get('text', '').isupper() == sections[1].get('isupper') + ) else None + + +def create_labeled_object2(i, item, state, sections): + obj = {} + result = None + + if match_section(item, sections): + result = match_section(item, sections) + state['label'] = result.get('label') + state['body'] = result.get('body') + + if match_subsection(item, sections): + result = match_subsection(item, sections) + state['label'] = result.get('label') + state['body'] = result.get('body') + + if state.get('body') and re.search(r"^(refer)", item.get('text').lower()) and match_section(item, sections): + state['label'] = '<sec>' + state['body'] = False + state['back'] = True + obj['type'] = 'paragraph' + obj['value'] = { + 'label': state['label'], + 'paragraph': item.get('text') + } + + if not result: + result = {'label': '<p>', 'body': state['body'], 'back': state['back']} + state['label'] = result.get('label') + state['body'] = result.get('body') + state['back'] = result.get('back') + + if result: + pass + else: + if state.get('label_next'): + if state.get('repeat'): + result = match_by_regex(item.get('text'), order_labels) + if result: + state['label'] = result[0] + else: + result = match_by_style_and_size(item, order_labels, style='bold') + if result: + state['label'] = result[0] + state['repeat'] = None + state['reset'] = None + state['label_next'] = result[1].get("next") + state['body'] = result[1].get("size") == 16 + if state['body'] and re.search(r"^(refer)", item.get('text').lower()): + state['body'] = False + state['back'] = True + if not result: + result = match_next_label(item, state['label_next'], order_labels) + if result: + state['label'] = result[0] + state['label_next_reset'] = result[1].get("next") + state['reset'] = result[1].get("reset", False) + state['repeat'] = result[1].get("repeat", False) + else: + result = match_by_style_and_size(item, order_labels, style='bold') + if result: + state['label'] = result[0] + state['label_next'] = result[1].get("next") + if state.get('body') and re.search(r"^(refer)", item.get('text').lower()): + state['body'] = False + state['back'] = True + else: + result = match_by_style_and_size(item, order_labels, style='italic') + if result: + state['label'] = re.sub(r"-\d+", "", result[0]) + state['label_next'] = result[1].get("next") + else: + result = match_by_regex(item.get('text'), order_labels) + if result: + state['label'] = result[0] + else: + result = match_paragraph(item, order_labels) + if result: + state['label'] = result[0] + + if result: + if state['label'] in ['<abstract>']: + order_labels.pop(state['label'], None) + + #label_info = result[1] + #obj['type'] = 'paragraph_with_language' if label_info.get("lan") else 'paragraph' + obj['type'] = 'paragraph' + + obj['value'] = { + 'label': state['label'], + 'paragraph': item.get('text') + } + + if state['label'] == '<contrib>': + obj['type'] = 'author_paragraph' + elif state['label'] == '<aff>': + obj['type'] = 'aff_paragraph' + + if re.search(r"^(translation)", item.get('text').lower()): + state['label'] = '<translate-fron>' + state['body'] = False + state['back'] = False + obj['type'] = 'paragraph_with_language' + obj['value'] = { + 'label': state['label'], + 'paragraph': item.get('text') + } + + return obj, result, state + + +def get_data_first_block(text, metadata, user_id): + payload = { + 'text': text, + 'metadata': metadata + } + + model = LlamaModel.objects.first() + + if model.name_file: + user = User.objects.get(pk=user_id) + refresh = RefreshToken.for_user(user) + access_token = refresh.access_token + + # FIXME: Hardcoded URL + url = "http://django:8000/api/v1/first_block/" + + headers = { + 'Authorization': f'Bearer {access_token}', + 'Content-Type': 'application/json' + } + + response = requests.post(url, json=payload, headers=headers) + + if response.status_code == 200: + response_json = response.json() + message_str = response_json['message'] + + resp_json = json.loads(message_str) + + return resp_json + + +def extract_keywords(text): + # Quitar punto final si existe + text = text.strip() + if text.endswith('.'): + text = text[:-1].strip() + + # Ver si contiene una etiqueta con dos puntos + match = re.match(r'(?i)\s*(.+?)\s*:\s*(.+)', text) + + if match: + label = match.group(1).strip() + content = match.group(2).strip() + else: + label = None + content = text + + # Separar por punto y coma o coma + keywords = re.split(r'\s*[;,]\s*', content) + clean_keywords = [p.strip() for p in keywords if p.strip()] + clean_keywords = ", ".join(keywords) + + return {"title": label, "keywords": clean_keywords} + + +def create_special_content_object(item, stream_data_body, counts): + """Create objects for special content types (image, table, list, compound)""" + obj = {} + + if item.get('type') == 'image': + obj = {} + counts['numfig'] += 1 + obj['type'] = 'image' + obj['value'] = { + 'figid' : f"f{counts['numfig']}", + 'label' : '<fig>', + 'image' : item.get('image') + } + + #Obitiene el elemento aterior + try: + prev_element = stream_data_body[-1] + label_title = extract_label_and_title(prev_element['value']['paragraph']) + obj['value']['figlabel'] = label_title['label'] + obj['value']['title'] = label_title['title'] + stream_data_body.pop(-1) + except: + pass + + elif item.get('type') == 'table': + obj = {} + counts['numtab'] += 1 + obj['type'] = 'table' + obj['value'] = { + 'tabid' : f"t{counts['numtab']}", + 'label' : '<table>', + 'content' : item.get('table') + } + + #Obitiene el elemento aterior + try: + prev_element = stream_data_body[-1] + label_title = extract_label_and_title(prev_element['value']['paragraph']) + obj['value']['tablabel'] = label_title['label'] + obj['value']['title'] = label_title['title'] + stream_data_body.pop(-1) + except: + #No hay elemento anterior + pass + + elif item.get('type') == 'list': + obj = {} + obj['type'] = 'paragraph' + obj['value'] = { + 'label' : '<list>', + 'paragraph' : item.get('list') + } + + elif item.get('type') == 'compound': + obj = {} + counts['numeq'] += 1 + obj['type'] = 'compound_paragraph' + obj['value'] = { + 'eid' : f"e{counts['numeq']}", + #'label' : '<formula>', + 'content': item.get('text') + } + text_count = sum( + 1 for c in obj['value']['content'] + if c['type'] == 'text' + ) + + if text_count > 1: + obj['value']['label'] = '<inline-formula>' + return obj, counts + + if text_count == 0: + obj['value']['label'] = '<disp-formula>' + return obj, counts + + text_value = next( + item['value'] + for item in obj['value']['content'] + if item['type'] == 'text' + ) + text = is_number_parenthesis(text_value) + if text: + obj['value']['label'] = '<disp-formula>' + next( + item + for item in obj['value']['content'] + if item['type'] == 'text' + )['value'] = text + else: + obj['value']['label'] = '<inline-formula>' + + return obj, counts + + +def extract_subsection(text): + # Quitar punto final si existe + text = text.strip() + + # Ver si contiene una etiqueta con dos puntos + match = re.match(r'(?i)\s*(.+?)\s*:\s*(.+)', text) + + if match: + label = match.group(1).strip() + content = match.group(2).strip() + else: + label = None + content = text + + return {"title": label, "content": content} + + +def search_special_id(data_body, label): + for d in data_body: + if d['type'] in ['image', 'table']: + data = d['value'] + clean_label = re.sub(r'^[\s\.,;:–—-]+', '', label).capitalize() + + if d['type'] == 'image': + if clean_label == data['figlabel']: + return data.get('figid') + if data['figid'][0] == clean_label.lower()[0] and data['figid'][1] in clean_label.lower(): + return data.get('figid') + + if d['type'] == 'table': + if clean_label == data['tablabel']: + return data.get('tabid') + if data['tabid'][0] == clean_label.lower()[0] and data['tabid'][1] in clean_label.lower(): + return data.get('tabid') + + for d in data_body: + if d['type'] in ['compound_paragraph']: + data = d['value'] + clean_label = re.sub(r'^[\s\.,;:–—-]+', '', label).lower() + + if d['type'] == 'compound_paragraph': + if data['eid'][0] in clean_label[0] and data['eid'][1] in clean_label: + return data.get('eid') + + return None + + +def is_number_parenthesis(text): + pattern = re.compile(r'^\s*\(\s*(\d+)\s*\)\s*$') + match = pattern.fullmatch(text) + if match: + return f"({match.group(1)})" + return None + + +def remove_unpaired_tags(text): + # Match opening/closing tags, capturing only the tag name (before any space or >) + pattern = re.compile(r'<(/?)([a-zA-Z0-9]+)(?:\s[^>]*)?>') + + result = [] + stack = [] # Stores (tag_name, position_in_result) + + i = 0 + for match in pattern.finditer(text): + is_closing, tag_name = match.groups() + is_closing = bool(is_closing) + + # Text between tags + if match.start() > i: + result.append(text[i:match.start()]) + + tag_text = text[match.start():match.end()] + + if not is_closing: + # Opening tag + stack.append((tag_name, len(result))) + result.append(tag_text) + else: + # Closing tag + if stack and stack[-1][0] == tag_name: + stack.pop() + result.append(tag_text) + else: + # Orphan closing tag - skip + pass + + i = match.end() + + # Append remaining text + if i < len(text): + result.append(text[i:]) + + # Remove unclosed opening tags + for tag_name, pos in sorted(stack, reverse=True, key=lambda x: x[1]): + result.pop(pos) + + return ''.join(result) + + +def append_fragment(node_dest, val): + if not val: + parent = node_dest.getparent() + if parent: + parent.remove(node_dest) + return + + # 1) Limpiezas mínimas + # - eliminar <br> / <br/> + # - quitar saltos de línea + clean = re.sub(r"(?i)<br\s*/?>", "", val) + clean = clean.replace("\n", "") + + # normaliza entidades problemáticas + clean = clean.replace(" ", " ") + clean = re.sub(r'&(?!\w+;|#\d+;)', '&', clean) + + clean = remove_unpaired_tags(clean) + + if clean == "": + parent = node_dest.getparent() + if parent: + parent.remove(node_dest) + return + + # 2) Si no hay etiquetas, es texto plano + if "<" not in clean: + node_dest.text = (node_dest.text or "") + clean + return + + # 3) Envolver para que sea XML bien formado aunque empiece con texto + wrapper = etree.XML(f"<_wrap_>{clean}</_wrap_>") + + # 4) Pasar el texto inicial (antes del primer tag) + if wrapper.text: + node_dest.text = (node_dest.text or "") + wrapper.text + + # 5) Mover cada hijo al destino (sus .tail se conservan) + for child in list(wrapper): + node_dest.append(child) + + +def extract_label_and_title(text): + """ + Extrae el Label (Figura/Figure/Tabla/Table/Tabela + número) y el Title (resto del texto limpio). + Ignora mayúsculas y minúsculas y limpia puntuación/espacios entre el número y el título. + """ + # Acepta Figura/Figure y Tabla/Table/Tabela + pattern = r'\b(Imagen|Imágen|Image|Imagem|Figura|Figure|Tabla|Table|Tabela)\s+(\d+)\b' + match = re.search(pattern, text, re.IGNORECASE) + + if match: + word = match.group(1).capitalize() # Normaliza capitalización + number = match.group(2) + label = f"{word} {number}" + + # Texto después del número + rest = text[match.end():] + + # Quita puntuación/espacios iniciales (.,;: guiones, etc.) + rest_clean = re.sub(r'^[\s\.,;:–—-]+', '', rest) + + return {"label": label, "title": rest_clean.strip()} + else: + return {"label": None, "title": text.strip()} + + +def proccess_special_content(text, data_body): + # normaliza espacios no separables por si acaso + text = re.sub(r'[\u00A0\u2007\u202F]', ' ', text) + + pattern = r""" + (?<!\w) # inicio no al medio de una palabra + (?: + Imagen|Imágen|Image|Imagem| + Figura|Figure| + Tabla|Table|Tabela| + Ecuaci[oó]n|Equa(?:ç[aã]o|cao)|Equation| + F[oó]rmula|Formula| + Eq\.|Ec\.|Form\.|F[óo]rm\. + )\s* + (?:\(\s*\d+\s*\)|\d+) # 1 o (1) + (?!\w) # que no siga una letra/número + """ + + res = [] + dict_type = {'f': 'fig', 't': 'table', 'e': 'disp-formula'} + + try: + for match in re.finditer(pattern, text, re.IGNORECASE | re.UNICODE | re.VERBOSE): + label = match.group(0) + + id = search_special_id(data_body, label) + + res.append({ + "label": label, + "id": id, + "reftype": dict_type.get(id[0].lower(), 'other') + }) + except Exception as exc: + print(f'ERROR proccess_special_content: {exc}') + pass + + return res diff --git a/markup_doc/marker.py b/markup_doc/marker.py new file mode 100644 index 0000000..20a49a0 --- /dev/null +++ b/markup_doc/marker.py @@ -0,0 +1,46 @@ +# Standard library imports +import re + +# Local application imports +from model_ai.llama import LlamaService, LlamaInputSettings + + +def mark_article(text, metadata): + if metadata == 'author': + messages, response_format = LlamaInputSettings.get_author_config() + if metadata == 'affiliation': + messages, response_format = LlamaInputSettings.get_affiliations() + if metadata == 'doi': + messages, response_format = LlamaInputSettings.get_doi_and_section() + if metadata == 'title': + messages, response_format = LlamaInputSettings.get_titles() + + gll = LlamaService(messages, response_format) + output = gll.run(text) + output = output['choices'][0]['message']['content'] + if metadata == 'doi': + output = re.search(r'\{.*\}', output, re.DOTALL) + else: + output = re.search(r'\[.*\]', output, re.DOTALL) + if output: + output = output.group(0) + return output + +def mark_reference(reference_text): + messages, response_format = LlamaInputSettings.get_messages_and_response_format_for_reference(reference_text) + reference_marker = LlamaService(messages, response_format) + output = reference_marker.run(reference_text) + + for item in output["choices"]: + yield item["message"]["content"] + + +def mark_references(reference_block): + for ref_row in reference_block.split("\n"): + ref_row = ref_row.strip() + if ref_row: + choices = mark_reference(ref_row) + yield { + "reference": ref_row, + "choices": list(choices) + } diff --git a/markup_doc/migrations/0001_initial.py b/markup_doc/migrations/0001_initial.py new file mode 100644 index 0000000..74340ef --- /dev/null +++ b/markup_doc/migrations/0001_initial.py @@ -0,0 +1,2291 @@ +# Generated by Django 5.0.3 on 2025-09-07 17:04 + +import django.db.models.deletion +import markup_doc.models +import wagtail.blocks +import wagtail.fields +import wagtail.images.blocks +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="CollectionValuesModel", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("acron", models.CharField(max_length=10, unique=True)), + ("name", models.CharField(max_length=255)), + ], + ), + migrations.CreateModel( + name="ArticleDocx", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ( + "title", + models.TextField( + blank=True, null=True, verbose_name="Document Title" + ), + ), + ( + "file", + models.FileField( + blank=True, + null=True, + upload_to="uploads_docx/", + verbose_name="Document", + ), + ), + ("estatus", models.IntegerField(default=0)), + ( + "creator", + models.ForeignKey( + editable=False, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="ArticleDocxMarkup", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ( + "title", + models.TextField( + blank=True, null=True, verbose_name="Document Title" + ), + ), + ( + "file", + models.FileField( + blank=True, + null=True, + upload_to="uploads_docx/", + verbose_name="Document", + ), + ), + ("estatus", models.IntegerField(default=0)), + ( + "collection", + models.CharField( + default=markup_doc.models.get_default_collection_acron, + max_length=10, + ), + ), + ( + "journal_title", + models.TextField( + blank=True, null=True, verbose_name="Journal Title" + ), + ), + ( + "acronym", + models.TextField(blank=True, null=True, verbose_name="Acronym"), + ), + ( + "short_title", + models.TextField(blank=True, null=True, verbose_name="Short Title"), + ), + ( + "title_nlm", + models.TextField(blank=True, null=True, verbose_name="NLM Title"), + ), + ( + "issn", + models.TextField( + blank=True, null=True, verbose_name="ISSN (id SciELO)" + ), + ), + ( + "pissn", + models.TextField(blank=True, null=True, verbose_name="Print ISSN"), + ), + ( + "eissn", + models.TextField( + blank=True, null=True, verbose_name="Electronic ISSN" + ), + ), + ( + "nimtitle", + models.TextField(blank=True, null=True, verbose_name="Nimtitle"), + ), + ( + "pubname", + models.TextField( + blank=True, null=True, verbose_name="Publisher Name" + ), + ), + ( + "license", + models.URLField( + blank=True, + max_length=500, + null=True, + verbose_name="License (URL)", + ), + ), + ( + "vol", + models.IntegerField(blank=True, null=True, verbose_name="Volume"), + ), + ( + "supplvol", + models.IntegerField( + blank=True, null=True, verbose_name="Suppl Volume" + ), + ), + ( + "issue", + models.IntegerField(blank=True, null=True, verbose_name="Issue"), + ), + ( + "supplno", + models.IntegerField( + blank=True, null=True, verbose_name="Suppl Num" + ), + ), + ( + "issid_part", + models.TextField(blank=True, null=True, verbose_name="Isid Part"), + ), + ( + "dateiso", + models.TextField(blank=True, null=True, verbose_name="Dateiso"), + ), + ( + "month", + models.TextField( + blank=True, null=True, verbose_name="Month/Season" + ), + ), + ( + "fpage", + models.TextField(blank=True, null=True, verbose_name="First Page"), + ), + ("seq", models.TextField(blank=True, null=True, verbose_name="@Seq")), + ( + "lpage", + models.TextField(blank=True, null=True, verbose_name="Last Page"), + ), + ( + "elocatid", + models.TextField( + blank=True, null=True, verbose_name="Elocation ID" + ), + ), + ( + "order", + models.TextField( + blank=True, null=True, verbose_name="Order (In TOC)" + ), + ), + ( + "pagcount", + models.TextField(blank=True, null=True, verbose_name="Pag count"), + ), + ( + "doctopic", + models.TextField(blank=True, null=True, verbose_name="Doc Topic"), + ), + ( + "language", + models.CharField( + blank=True, + choices=[ + ("aa", "Afar"), + ("af", "Afrikaans"), + ("ak", "Akan"), + ("sq", "Albanian"), + ("am", "Amharic"), + ("ar", "Arabic"), + ("an", "Aragonese"), + ("hy", "Armenian"), + ("as", "Assamese"), + ("av", "Avaric"), + ("ae", "Avestan"), + ("ay", "Aymara"), + ("az", "Azerbaijani"), + ("bm", "Bambara"), + ("ba", "Bashkir"), + ("eu", "Basque"), + ("be", "Belarusian"), + ("bn", "Bengali"), + ("bi", "Bislama"), + ("bs", "Bosnian"), + ("br", "Breton"), + ("bg", "Bulgarian"), + ("my", "Burmese"), + ("ca", "Catalan, Valencian"), + ("ch", "Chamorro"), + ("ce", "Chechen"), + ("ny", "Chichewa, Chewa, Nyanja"), + ("zh", "Chinese"), + ( + "cu", + "Church Slavic, Old Slavonic, Church Slavonic, Old Bulgarian, Old Church Slavonic", + ), + ("cv", "Chuvash"), + ("kw", "Cornish"), + ("co", "Corsican"), + ("cr", "Cree"), + ("hr", "Croatian"), + ("cs", "Czech"), + ("da", "Danish"), + ("dv", "Divehi, Dhivehi, Maldivian"), + ("nl", "Dutch, Flemish"), + ("dz", "Dzongkha"), + ("en", "English"), + ("eo", "Esperanto"), + ("et", "Estonian"), + ("ee", "Ewe"), + ("fo", "Faroese"), + ("fj", "Fijian"), + ("fi", "Finnish"), + ("fr", "French"), + ("fy", "Western Frisian"), + ("ff", "Fulah"), + ("gd", "Gaelic, Scottish Gaelic"), + ("gl", "Galician"), + ("lg", "Ganda"), + ("ka", "Georgian"), + ("de", "German"), + ("el", "Greek, Modern (1453–)"), + ("kl", "Kalaallisut, Greenlandic"), + ("gn", "Guarani"), + ("gu", "Gujarati"), + ("ht", "Haitian, Haitian Creole"), + ("ha", "Hausa"), + ("he", "Hebrew"), + ("hz", "Herero"), + ("hi", "Hindi"), + ("ho", "Hiri Motu"), + ("hu", "Hungarian"), + ("is", "Icelandic"), + ("io", "Ido"), + ("ig", "Igbo"), + ("id", "Indonesian"), + ( + "ia", + "Interlingua (International Auxiliary Language Association)", + ), + ("ie", "Interlingue, Occidental"), + ("iu", "Inuktitut"), + ("ik", "Inupiaq"), + ("ga", "Irish"), + ("it", "Italian"), + ("ja", "Japanese"), + ("jv", "Javanese"), + ("kn", "Kannada"), + ("kr", "Kanuri"), + ("ks", "Kashmiri"), + ("kk", "Kazakh"), + ("km", "Central Khmer"), + ("ki", "Kikuyu, Gikuyu"), + ("rw", "Kinyarwanda"), + ("ky", "Kirghiz, Kyrgyz"), + ("kv", "Komi"), + ("kg", "Kongo"), + ("ko", "Korean"), + ("kj", "Kuanyama, Kwanyama"), + ("ku", "Kurdish"), + ("lo", "Lao"), + ("la", "Latin"), + ("lv", "Latvian"), + ("li", "Limburgan, Limburger, Limburgish"), + ("ln", "Lingala"), + ("lt", "Lithuanian"), + ("lu", "Luba-Katanga"), + ("lb", "Luxembourgish, Letzeburgesch"), + ("mk", "Macedonian"), + ("mg", "Malagasy"), + ("ms", "Malay"), + ("ml", "Malayalam"), + ("mt", "Maltese"), + ("gv", "Manx"), + ("mi", "Maori"), + ("mr", "Marathi"), + ("mh", "Marshallese"), + ("mn", "Mongolian"), + ("na", "Nauru"), + ("nv", "Navajo, Navaho"), + ("nd", "North Ndebele"), + ("nr", "South Ndebele"), + ("ng", "Ndonga"), + ("ne", "Nepali"), + ("no", "Norwegian"), + ("nb", "Norwegian Bokmål"), + ("nn", "Norwegian Nynorsk"), + ("ii", "Sichuan Yi, Nuosu"), + ("oc", "Occitan"), + ("oj", "Ojibwa"), + ("or", "Oriya"), + ("om", "Oromo"), + ("os", "Ossetian, Ossetic"), + ("pi", "Pali"), + ("ps", "Pashto, Pushto"), + ("fa", "Persian"), + ("pl", "Polish"), + ("pt", "Português"), + ("pa", "Punjabi, Panjabi"), + ("qu", "Quechua"), + ("ro", "Romanian, Moldavian, Moldovan"), + ("rm", "Romansh"), + ("rn", "Rundi"), + ("ru", "Russian"), + ("se", "Northern Sami"), + ("sm", "Samoan"), + ("sg", "Sango"), + ("sa", "Sanskrit"), + ("sc", "Sardinian"), + ("sr", "Serbian"), + ("sn", "Shona"), + ("sd", "Sindhi"), + ("si", "Sinhala, Sinhalese"), + ("sk", "Slovak"), + ("sl", "Slovenian"), + ("so", "Somali"), + ("st", "Southern Sotho"), + ("es", "Español"), + ("su", "Sundanese"), + ("sw", "Swahili"), + ("ss", "Swati"), + ("sv", "Swedish"), + ("tl", "Tagalog"), + ("ty", "Tahitian"), + ("tg", "Tajik"), + ("ta", "Tamil"), + ("tt", "Tatar"), + ("te", "Telugu"), + ("th", "Thai"), + ("bo", "Tibetan"), + ("ti", "Tigrinya"), + ("to", "Tonga (Tonga Islands)"), + ("ts", "Tsonga"), + ("tn", "Tswana"), + ("tr", "Turkish"), + ("tk", "Turkmen"), + ("tw", "Twi"), + ("ug", "Uighur, Uyghur"), + ("uk", "Ukrainian"), + ("ur", "Urdu"), + ("uz", "Uzbek"), + ("ve", "Venda"), + ("vi", "Vietnamese"), + ("vo", "Volapük"), + ("wa", "Walloon"), + ("cy", "Welsh"), + ("wo", "Wolof"), + ("xh", "Xhosa"), + ("yi", "Yiddish"), + ("yo", "Yoruba"), + ("za", "Zhuang, Chuang"), + ("zu", "Zulu"), + ], + max_length=10, + null=True, + verbose_name="Language", + ), + ), + ( + "spsversion", + models.TextField(blank=True, null=True, verbose_name="Sps version"), + ), + ( + "artdate", + models.DateField(blank=True, null=True, verbose_name="Artdate"), + ), + ( + "ahpdate", + models.DateField(blank=True, null=True, verbose_name="Ahpdate"), + ), + ( + "file_xml", + models.FileField( + blank=True, + null=True, + upload_to="generate_xml/", + verbose_name="Document xml", + ), + ), + ( + "text_xml", + models.TextField(blank=True, null=True, verbose_name="Text XML"), + ), + ( + "content", + wagtail.fields.StreamField( + [ + ( + "paragraph_with_language", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "language", + wagtail.blocks.ChoiceBlock( + choices=[ + ("aa", "Afar"), + ("af", "Afrikaans"), + ("ak", "Akan"), + ("sq", "Albanian"), + ("am", "Amharic"), + ("ar", "Arabic"), + ("an", "Aragonese"), + ("hy", "Armenian"), + ("as", "Assamese"), + ("av", "Avaric"), + ("ae", "Avestan"), + ("ay", "Aymara"), + ("az", "Azerbaijani"), + ("bm", "Bambara"), + ("ba", "Bashkir"), + ("eu", "Basque"), + ("be", "Belarusian"), + ("bn", "Bengali"), + ("bi", "Bislama"), + ("bs", "Bosnian"), + ("br", "Breton"), + ("bg", "Bulgarian"), + ("my", "Burmese"), + ("ca", "Catalan, Valencian"), + ("ch", "Chamorro"), + ("ce", "Chechen"), + ("ny", "Chichewa, Chewa, Nyanja"), + ("zh", "Chinese"), + ( + "cu", + "Church Slavic, Old Slavonic, Church Slavonic, Old Bulgarian, Old Church Slavonic", + ), + ("cv", "Chuvash"), + ("kw", "Cornish"), + ("co", "Corsican"), + ("cr", "Cree"), + ("hr", "Croatian"), + ("cs", "Czech"), + ("da", "Danish"), + ( + "dv", + "Divehi, Dhivehi, Maldivian", + ), + ("nl", "Dutch, Flemish"), + ("dz", "Dzongkha"), + ("en", "English"), + ("eo", "Esperanto"), + ("et", "Estonian"), + ("ee", "Ewe"), + ("fo", "Faroese"), + ("fj", "Fijian"), + ("fi", "Finnish"), + ("fr", "French"), + ("fy", "Western Frisian"), + ("ff", "Fulah"), + ("gd", "Gaelic, Scottish Gaelic"), + ("gl", "Galician"), + ("lg", "Ganda"), + ("ka", "Georgian"), + ("de", "German"), + ("el", "Greek, Modern (1453–)"), + ("kl", "Kalaallisut, Greenlandic"), + ("gn", "Guarani"), + ("gu", "Gujarati"), + ("ht", "Haitian, Haitian Creole"), + ("ha", "Hausa"), + ("he", "Hebrew"), + ("hz", "Herero"), + ("hi", "Hindi"), + ("ho", "Hiri Motu"), + ("hu", "Hungarian"), + ("is", "Icelandic"), + ("io", "Ido"), + ("ig", "Igbo"), + ("id", "Indonesian"), + ( + "ia", + "Interlingua (International Auxiliary Language Association)", + ), + ("ie", "Interlingue, Occidental"), + ("iu", "Inuktitut"), + ("ik", "Inupiaq"), + ("ga", "Irish"), + ("it", "Italian"), + ("ja", "Japanese"), + ("jv", "Javanese"), + ("kn", "Kannada"), + ("kr", "Kanuri"), + ("ks", "Kashmiri"), + ("kk", "Kazakh"), + ("km", "Central Khmer"), + ("ki", "Kikuyu, Gikuyu"), + ("rw", "Kinyarwanda"), + ("ky", "Kirghiz, Kyrgyz"), + ("kv", "Komi"), + ("kg", "Kongo"), + ("ko", "Korean"), + ("kj", "Kuanyama, Kwanyama"), + ("ku", "Kurdish"), + ("lo", "Lao"), + ("la", "Latin"), + ("lv", "Latvian"), + ( + "li", + "Limburgan, Limburger, Limburgish", + ), + ("ln", "Lingala"), + ("lt", "Lithuanian"), + ("lu", "Luba-Katanga"), + ( + "lb", + "Luxembourgish, Letzeburgesch", + ), + ("mk", "Macedonian"), + ("mg", "Malagasy"), + ("ms", "Malay"), + ("ml", "Malayalam"), + ("mt", "Maltese"), + ("gv", "Manx"), + ("mi", "Maori"), + ("mr", "Marathi"), + ("mh", "Marshallese"), + ("mn", "Mongolian"), + ("na", "Nauru"), + ("nv", "Navajo, Navaho"), + ("nd", "North Ndebele"), + ("nr", "South Ndebele"), + ("ng", "Ndonga"), + ("ne", "Nepali"), + ("no", "Norwegian"), + ("nb", "Norwegian Bokmål"), + ("nn", "Norwegian Nynorsk"), + ("ii", "Sichuan Yi, Nuosu"), + ("oc", "Occitan"), + ("oj", "Ojibwa"), + ("or", "Oriya"), + ("om", "Oromo"), + ("os", "Ossetian, Ossetic"), + ("pi", "Pali"), + ("ps", "Pashto, Pushto"), + ("fa", "Persian"), + ("pl", "Polish"), + ("pt", "Português"), + ("pa", "Punjabi, Panjabi"), + ("qu", "Quechua"), + ( + "ro", + "Romanian, Moldavian, Moldovan", + ), + ("rm", "Romansh"), + ("rn", "Rundi"), + ("ru", "Russian"), + ("se", "Northern Sami"), + ("sm", "Samoan"), + ("sg", "Sango"), + ("sa", "Sanskrit"), + ("sc", "Sardinian"), + ("sr", "Serbian"), + ("sn", "Shona"), + ("sd", "Sindhi"), + ("si", "Sinhala, Sinhalese"), + ("sk", "Slovak"), + ("sl", "Slovenian"), + ("so", "Somali"), + ("st", "Southern Sotho"), + ("es", "Español"), + ("su", "Sundanese"), + ("sw", "Swahili"), + ("ss", "Swati"), + ("sv", "Swedish"), + ("tl", "Tagalog"), + ("ty", "Tahitian"), + ("tg", "Tajik"), + ("ta", "Tamil"), + ("tt", "Tatar"), + ("te", "Telugu"), + ("th", "Thai"), + ("bo", "Tibetan"), + ("ti", "Tigrinya"), + ("to", "Tonga (Tonga Islands)"), + ("ts", "Tsonga"), + ("tn", "Tswana"), + ("tr", "Turkish"), + ("tk", "Turkmen"), + ("tw", "Twi"), + ("ug", "Uighur, Uyghur"), + ("uk", "Ukrainian"), + ("ur", "Urdu"), + ("uz", "Uzbek"), + ("ve", "Venda"), + ("vi", "Vietnamese"), + ("vo", "Volapük"), + ("wa", "Walloon"), + ("cy", "Welsh"), + ("wo", "Wolof"), + ("xh", "Xhosa"), + ("yi", "Yiddish"), + ("yo", "Yoruba"), + ("za", "Zhuang, Chuang"), + ("zu", "Zulu"), + ], + label="Language", + required=False, + ), + ), + ( + "paragraph", + wagtail.blocks.TextBlock( + label="Title", required=False + ), + ), + ] + ), + ), + ( + "paragraph", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "paragraph", + wagtail.blocks.TextBlock( + label="Paragraph", required=False + ), + ), + ] + ), + ), + ( + "author_paragraph", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "paragraph", + wagtail.blocks.TextBlock( + label="Paragraph", required=False + ), + ), + ( + "surname", + wagtail.blocks.TextBlock( + label="Surname", required=False + ), + ), + ( + "given_names", + wagtail.blocks.TextBlock( + label="Given names", required=False + ), + ), + ( + "orcid", + wagtail.blocks.TextBlock( + label="Orcid", required=False + ), + ), + ( + "affid", + wagtail.blocks.TextBlock( + label="Aff id", required=False + ), + ), + ( + "char", + wagtail.blocks.TextBlock( + label="Char link", required=False + ), + ), + ] + ), + ), + ( + "aff_paragraph", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "paragraph", + wagtail.blocks.TextBlock( + label="Paragraph", required=False + ), + ), + ( + "affid", + wagtail.blocks.TextBlock( + label="Aff id", required=False + ), + ), + ( + "text_aff", + wagtail.blocks.TextBlock( + label="Full text Aff", required=False + ), + ), + ( + "char", + wagtail.blocks.TextBlock( + label="Char link", required=False + ), + ), + ( + "orgname", + wagtail.blocks.TextBlock( + label="Orgname", required=False + ), + ), + ( + "orgdiv2", + wagtail.blocks.TextBlock( + label="Orgdiv2", required=False + ), + ), + ( + "orgdiv1", + wagtail.blocks.TextBlock( + label="Orgdiv1", required=False + ), + ), + ( + "zipcode", + wagtail.blocks.TextBlock( + label="Zipcode", required=False + ), + ), + ( + "city", + wagtail.blocks.TextBlock( + label="City", required=False + ), + ), + ( + "state", + wagtail.blocks.TextBlock( + label="State", required=False + ), + ), + ( + "country", + wagtail.blocks.TextBlock( + label="Country", required=False + ), + ), + ( + "code_country", + wagtail.blocks.TextBlock( + label="Code country", required=False + ), + ), + ( + "original", + wagtail.blocks.TextBlock( + label="Original", required=False + ), + ), + ] + ), + ), + ], + blank=True, + use_json_field=True, + ), + ), + ( + "content_body", + wagtail.fields.StreamField( + [ + ( + "paragraph", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "paragraph", + wagtail.blocks.TextBlock( + label="Paragraph", required=False + ), + ), + ] + ), + ), + ( + "paragraph_with_language", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "language", + wagtail.blocks.ChoiceBlock( + choices=[ + ("aa", "Afar"), + ("af", "Afrikaans"), + ("ak", "Akan"), + ("sq", "Albanian"), + ("am", "Amharic"), + ("ar", "Arabic"), + ("an", "Aragonese"), + ("hy", "Armenian"), + ("as", "Assamese"), + ("av", "Avaric"), + ("ae", "Avestan"), + ("ay", "Aymara"), + ("az", "Azerbaijani"), + ("bm", "Bambara"), + ("ba", "Bashkir"), + ("eu", "Basque"), + ("be", "Belarusian"), + ("bn", "Bengali"), + ("bi", "Bislama"), + ("bs", "Bosnian"), + ("br", "Breton"), + ("bg", "Bulgarian"), + ("my", "Burmese"), + ("ca", "Catalan, Valencian"), + ("ch", "Chamorro"), + ("ce", "Chechen"), + ("ny", "Chichewa, Chewa, Nyanja"), + ("zh", "Chinese"), + ( + "cu", + "Church Slavic, Old Slavonic, Church Slavonic, Old Bulgarian, Old Church Slavonic", + ), + ("cv", "Chuvash"), + ("kw", "Cornish"), + ("co", "Corsican"), + ("cr", "Cree"), + ("hr", "Croatian"), + ("cs", "Czech"), + ("da", "Danish"), + ( + "dv", + "Divehi, Dhivehi, Maldivian", + ), + ("nl", "Dutch, Flemish"), + ("dz", "Dzongkha"), + ("en", "English"), + ("eo", "Esperanto"), + ("et", "Estonian"), + ("ee", "Ewe"), + ("fo", "Faroese"), + ("fj", "Fijian"), + ("fi", "Finnish"), + ("fr", "French"), + ("fy", "Western Frisian"), + ("ff", "Fulah"), + ("gd", "Gaelic, Scottish Gaelic"), + ("gl", "Galician"), + ("lg", "Ganda"), + ("ka", "Georgian"), + ("de", "German"), + ("el", "Greek, Modern (1453–)"), + ("kl", "Kalaallisut, Greenlandic"), + ("gn", "Guarani"), + ("gu", "Gujarati"), + ("ht", "Haitian, Haitian Creole"), + ("ha", "Hausa"), + ("he", "Hebrew"), + ("hz", "Herero"), + ("hi", "Hindi"), + ("ho", "Hiri Motu"), + ("hu", "Hungarian"), + ("is", "Icelandic"), + ("io", "Ido"), + ("ig", "Igbo"), + ("id", "Indonesian"), + ( + "ia", + "Interlingua (International Auxiliary Language Association)", + ), + ("ie", "Interlingue, Occidental"), + ("iu", "Inuktitut"), + ("ik", "Inupiaq"), + ("ga", "Irish"), + ("it", "Italian"), + ("ja", "Japanese"), + ("jv", "Javanese"), + ("kn", "Kannada"), + ("kr", "Kanuri"), + ("ks", "Kashmiri"), + ("kk", "Kazakh"), + ("km", "Central Khmer"), + ("ki", "Kikuyu, Gikuyu"), + ("rw", "Kinyarwanda"), + ("ky", "Kirghiz, Kyrgyz"), + ("kv", "Komi"), + ("kg", "Kongo"), + ("ko", "Korean"), + ("kj", "Kuanyama, Kwanyama"), + ("ku", "Kurdish"), + ("lo", "Lao"), + ("la", "Latin"), + ("lv", "Latvian"), + ( + "li", + "Limburgan, Limburger, Limburgish", + ), + ("ln", "Lingala"), + ("lt", "Lithuanian"), + ("lu", "Luba-Katanga"), + ( + "lb", + "Luxembourgish, Letzeburgesch", + ), + ("mk", "Macedonian"), + ("mg", "Malagasy"), + ("ms", "Malay"), + ("ml", "Malayalam"), + ("mt", "Maltese"), + ("gv", "Manx"), + ("mi", "Maori"), + ("mr", "Marathi"), + ("mh", "Marshallese"), + ("mn", "Mongolian"), + ("na", "Nauru"), + ("nv", "Navajo, Navaho"), + ("nd", "North Ndebele"), + ("nr", "South Ndebele"), + ("ng", "Ndonga"), + ("ne", "Nepali"), + ("no", "Norwegian"), + ("nb", "Norwegian Bokmål"), + ("nn", "Norwegian Nynorsk"), + ("ii", "Sichuan Yi, Nuosu"), + ("oc", "Occitan"), + ("oj", "Ojibwa"), + ("or", "Oriya"), + ("om", "Oromo"), + ("os", "Ossetian, Ossetic"), + ("pi", "Pali"), + ("ps", "Pashto, Pushto"), + ("fa", "Persian"), + ("pl", "Polish"), + ("pt", "Português"), + ("pa", "Punjabi, Panjabi"), + ("qu", "Quechua"), + ( + "ro", + "Romanian, Moldavian, Moldovan", + ), + ("rm", "Romansh"), + ("rn", "Rundi"), + ("ru", "Russian"), + ("se", "Northern Sami"), + ("sm", "Samoan"), + ("sg", "Sango"), + ("sa", "Sanskrit"), + ("sc", "Sardinian"), + ("sr", "Serbian"), + ("sn", "Shona"), + ("sd", "Sindhi"), + ("si", "Sinhala, Sinhalese"), + ("sk", "Slovak"), + ("sl", "Slovenian"), + ("so", "Somali"), + ("st", "Southern Sotho"), + ("es", "Español"), + ("su", "Sundanese"), + ("sw", "Swahili"), + ("ss", "Swati"), + ("sv", "Swedish"), + ("tl", "Tagalog"), + ("ty", "Tahitian"), + ("tg", "Tajik"), + ("ta", "Tamil"), + ("tt", "Tatar"), + ("te", "Telugu"), + ("th", "Thai"), + ("bo", "Tibetan"), + ("ti", "Tigrinya"), + ("to", "Tonga (Tonga Islands)"), + ("ts", "Tsonga"), + ("tn", "Tswana"), + ("tr", "Turkish"), + ("tk", "Turkmen"), + ("tw", "Twi"), + ("ug", "Uighur, Uyghur"), + ("uk", "Ukrainian"), + ("ur", "Urdu"), + ("uz", "Uzbek"), + ("ve", "Venda"), + ("vi", "Vietnamese"), + ("vo", "Volapük"), + ("wa", "Walloon"), + ("cy", "Welsh"), + ("wo", "Wolof"), + ("xh", "Xhosa"), + ("yi", "Yiddish"), + ("yo", "Yoruba"), + ("za", "Zhuang, Chuang"), + ("zu", "Zulu"), + ], + label="Language", + required=False, + ), + ), + ( + "paragraph", + wagtail.blocks.TextBlock( + label="Title", required=False + ), + ), + ] + ), + ), + ( + "compound_paragraph", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "eid", + wagtail.blocks.TextBlock( + label="Equation id", required=False + ), + ), + ( + "content", + wagtail.blocks.StreamBlock( + [ + ( + "text", + wagtail.blocks.TextBlock( + label="Text" + ), + ), + ( + "formula", + wagtail.blocks.TextBlock( + label="Formula" + ), + ), + ], + label="Content", + required=True, + ), + ), + ] + ), + ), + ( + "image", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "figid", + wagtail.blocks.TextBlock( + label="Fig id", required=False + ), + ), + ( + "figlabel", + wagtail.blocks.TextBlock( + label="Fig label", required=False + ), + ), + ( + "title", + wagtail.blocks.TextBlock( + label="Title", required=False + ), + ), + ( + "alttext", + wagtail.blocks.TextBlock( + label="Alt text", required=False + ), + ), + ( + "image", + wagtail.images.blocks.ImageChooserBlock( + required=True + ), + ), + ] + ), + ), + ( + "table", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "tabid", + wagtail.blocks.TextBlock( + label="Table id", required=False + ), + ), + ( + "tablabel", + wagtail.blocks.TextBlock( + label="Table label", required=False + ), + ), + ( + "title", + wagtail.blocks.TextBlock( + label="Title", required=False + ), + ), + ( + "content", + wagtail.blocks.TextBlock( + label="Content", required=False + ), + ), + ] + ), + ), + ], + blank=True, + use_json_field=True, + ), + ), + ( + "content_back", + wagtail.fields.StreamField( + [ + ( + "paragraph", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "paragraph", + wagtail.blocks.TextBlock( + label="Paragraph", required=False + ), + ), + ] + ), + ), + ( + "ref_paragraph", + wagtail.blocks.StructBlock( + [ + ( + "label", + wagtail.blocks.ChoiceBlock( + choices=[ + ("<abstract>", "<abstract>"), + ( + "<abstract-title>", + "<abstract-title>", + ), + ("<aff>", "<aff>"), + ("<article-id>", "<article-id>"), + ( + "<article-title>", + "<article-title>", + ), + ( + "<author-notes>", + "<author-notes>", + ), + ("<contrib>", "<contrib>"), + ( + "<date-accepted>", + "<date-accepted>", + ), + ( + "<date-received>", + "<date-received>", + ), + ("<fig>", "<fig>"), + ("<fig-attrib>", "<fig-attrib>"), + ("<history>", "<history>"), + ("<kwd-title>", "<kwd-title>"), + ("<kwd-group>", "<kwd-group>"), + ("<list>", "<list>"), + ("<p>", "<p>"), + ("<sec>", "<sec>"), + ("<sub-sec>", "<sub-sec>"), + ("<subject>", "<subject>"), + ("<table>", "<table>"), + ("<table-foot>", "<table-foot>"), + ("<title>", "<title>"), + ( + "<trans-abstract>", + "<trans-abstract>", + ), + ("<trans-title>", "<trans-title>"), + ( + "<translate-front>", + "<translate-front>", + ), + ( + "<translate-body>", + "<translate-body>", + ), + ( + "<disp-formula>", + "<disp-formula>", + ), + ( + "<inline-formula>", + "<inline-formula>", + ), + ("<formula>", "<formula>"), + ], + label="Label", + required=False, + ), + ), + ( + "paragraph", + wagtail.blocks.TextBlock( + label="Paragraph", required=False + ), + ), + ( + "reftype", + wagtail.blocks.TextBlock( + label="Ref type", required=False + ), + ), + ( + "refid", + wagtail.blocks.TextBlock( + label="Ref id", required=False + ), + ), + ( + "authors", + wagtail.blocks.StreamBlock( + [ + ( + "Author", + wagtail.blocks.StructBlock( + [ + ( + "surname", + wagtail.blocks.TextBlock( + label="Surname", + required=False, + ), + ), + ( + "given_names", + wagtail.blocks.TextBlock( + label="Given names", + required=False, + ), + ), + ] + ), + ) + ], + label="Authors", + required=False, + ), + ), + ( + "date", + wagtail.blocks.TextBlock( + label="Date", required=False + ), + ), + ( + "title", + wagtail.blocks.TextBlock( + label="Title", required=False + ), + ), + ( + "chapter", + wagtail.blocks.TextBlock( + label="Chapter", required=False + ), + ), + ( + "edition", + wagtail.blocks.TextBlock( + label="Edition", required=False + ), + ), + ( + "source", + wagtail.blocks.TextBlock( + label="Source", required=False + ), + ), + ( + "vol", + wagtail.blocks.TextBlock( + label="Vol", required=False + ), + ), + ( + "issue", + wagtail.blocks.TextBlock( + label="Issue", required=False + ), + ), + ( + "pages", + wagtail.blocks.TextBlock( + label="Pages", required=False + ), + ), + ( + "fpage", + wagtail.blocks.TextBlock( + label="First page", required=False + ), + ), + ( + "lpage", + wagtail.blocks.TextBlock( + label="Last page", required=False + ), + ), + ( + "doi", + wagtail.blocks.TextBlock( + label="DOI", required=False + ), + ), + ( + "access_id", + wagtail.blocks.TextBlock( + label="Access id", required=False + ), + ), + ( + "degree", + wagtail.blocks.TextBlock( + label="Degree", required=False + ), + ), + ( + "organization", + wagtail.blocks.TextBlock( + label="Organization", required=False + ), + ), + ( + "location", + wagtail.blocks.TextBlock( + label="Location", required=False + ), + ), + ( + "org_location", + wagtail.blocks.TextBlock( + label="Org location", required=False + ), + ), + ( + "num_pages", + wagtail.blocks.TextBlock( + label="Num pages", required=False + ), + ), + ( + "uri", + wagtail.blocks.TextBlock( + label="Uri", required=False + ), + ), + ( + "version", + wagtail.blocks.TextBlock( + label="Version", required=False + ), + ), + ( + "access_date", + wagtail.blocks.TextBlock( + label="Access date", required=False + ), + ), + ] + ), + ), + ], + blank=True, + use_json_field=True, + ), + ), + ( + "creator", + models.ForeignKey( + editable=False, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="MarkupXML", + fields=[], + options={ + "proxy": True, + "indexes": [], + "constraints": [], + }, + bases=("markup_doc.articledocxmarkup",), + ), + migrations.CreateModel( + name="UploadDocx", + fields=[], + options={ + "proxy": True, + "indexes": [], + "constraints": [], + }, + bases=("markup_doc.articledocxmarkup",), + ), + migrations.CreateModel( + name="CollectionModel", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "collection", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="markup_doc.collectionvaluesmodel", + ), + ), + ], + ), + migrations.CreateModel( + name="JournalModel", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "title", + models.TextField(blank=True, null=True, verbose_name="Title"), + ), + ( + "short_title", + models.TextField(blank=True, null=True, verbose_name="Short Title"), + ), + ( + "title_nlm", + models.TextField(blank=True, null=True, verbose_name="NLM Title"), + ), + ( + "acronym", + models.TextField(blank=True, null=True, verbose_name="Acronym"), + ), + ( + "issn", + models.TextField( + blank=True, null=True, verbose_name="ISSN (id SciELO)" + ), + ), + ( + "pissn", + models.TextField(blank=True, null=True, verbose_name="Print ISSN"), + ), + ( + "eissn", + models.TextField( + blank=True, null=True, verbose_name="Electronic ISSN" + ), + ), + ( + "pubname", + models.TextField( + blank=True, null=True, verbose_name="Publisher Name" + ), + ), + ], + options={ + "unique_together": {("title",)}, + }, + ), + migrations.AddField( + model_name="articledocxmarkup", + name="journal", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="markup_doc.journalmodel", + ), + ), + ] diff --git a/markup_doc/migrations/0002_alter_articledocx_estatus_and_more.py b/markup_doc/migrations/0002_alter_articledocx_estatus_and_more.py new file mode 100644 index 0000000..600fba0 --- /dev/null +++ b/markup_doc/migrations/0002_alter_articledocx_estatus_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 5.0.8 on 2025-09-21 23:13 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('markup_doc', '0001_initial'), + ] + + operations = [ + migrations.AlterField( + model_name='articledocx', + name='estatus', + field=models.IntegerField(blank=True, choices=[(1, 'Processing'), (2, 'Processed')], default=1, verbose_name='Process estatus'), + ), + migrations.AlterField( + model_name='articledocxmarkup', + name='estatus', + field=models.IntegerField(blank=True, choices=[(1, 'Processing'), (2, 'Processed')], default=1, verbose_name='Process estatus'), + ), + ] diff --git a/markup_doc/migrations/__init__.py b/markup_doc/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/markup_doc/models.py b/markup_doc/models.py new file mode 100644 index 0000000..bfd4722 --- /dev/null +++ b/markup_doc/models.py @@ -0,0 +1,521 @@ +import os +import sys +import requests + +from django.db import models +from django.utils.translation import gettext_lazy as _ +from django import forms +from django.utils.html import format_html +from django.urls import reverse + +from modelcluster.fields import ParentalKey +from modelcluster.models import ClusterableModel +from wagtail.admin.panels import FieldPanel, InlinePanel, ObjectList, TabbedInterface +from wagtailautocomplete.edit_handlers import AutocompletePanel +from wagtail.documents.models import Document + +from core.forms import CoreAdminModelForm +from core.choices import LANGUAGE +from core.models import ( + CommonControlField, + Language, + TextWithLang +) +from wagtail.fields import StreamField +from wagtail.blocks import StructBlock, TextBlock, CharBlock, ChoiceBlock, ListBlock, StreamBlock +from wagtail.images.blocks import ImageChooserBlock +from .choices import front_labels + + +class ProcessStatus(models.IntegerChoices): + PROCESSING = 1, _("Processing") + PROCESSED = 2, _("Processed") + + +class ReadOnlyFileWidget(forms.Widget): + def render(self, name, value, attrs=None, renderer=None): + if value: + # Muestra el archivo como un enlace de descarga + #return format_html('<a href="{}" target="_blank" download>{}</a>', value.url, value.name.split('/')[-1]) + instance = value.instance + url = reverse('generate_xml', args=[instance.pk]) + return format_html('<a href="{}" target="_blank" download>Download XML</a>', url) + return "" + +# Create your models here. +class ArticleDocx(CommonControlField): + title = models.TextField(_("Document Title"), null=True, blank=True) + file = models.FileField( + null=True, + blank=True, + verbose_name=_("Document"), + upload_to='uploads_docx/', + ) + estatus = models.IntegerField( + _("Process estatus"), + choices=ProcessStatus.choices, + blank=True, + default=ProcessStatus.PROCESSING + ) + + panels = [ + FieldPanel("title"), + FieldPanel("file"), + ] + + base_form_class = CoreAdminModelForm + + def __unicode__(self): + return f"{self.title} | {self.estatus}" + + def __str__(self): + return f"{self.title} | {self.estatus}" + + @classmethod + def get( + cls, + title): + return cls.objects.get(title=title) + + @classmethod + def update(cls, title, estatus): + try: + obj = cls.get(title=title) + except (cls.DoesNotExist, ValueError): + pass + + obj.estatus = estatus + obj.save() + return obj + + +class ParagraphWithLanguageBlock(StructBlock): + label = ChoiceBlock( + choices=front_labels, + required=False, + label=_("Label") + ) + language = ChoiceBlock( + choices=LANGUAGE, + required=False, + label="Language" + ) + paragraph = TextBlock(required=False, label=_("Title")) + + class Meta: + label = _("Paragraph with Language") + + +class ParagraphBlock(StructBlock): + label = ChoiceBlock( + choices=front_labels, + required=False, + label=_("Label") + ) + paragraph = TextBlock(required=False, label=_("Paragraph")) + + class Meta: + label = _("Paragraph") + + +class CompoundParagraphBlock(StructBlock): + label = ChoiceBlock( + choices=front_labels, + required=False, + label=_("Label") + ) + eid = TextBlock(required=False, label=_("Equation id")) + content = StreamBlock([ + ('text', TextBlock(label=_("Text"))), + ('formula', TextBlock(label=_("Formula"))), + ], label=_("Content"), required=True) + + class Meta: + label = _("Compound paragraph") + + +class ImageBlock(StructBlock): + label = ChoiceBlock( + choices=front_labels, + required=False, + label=_("Label") + ) + figid = TextBlock(required=False, label=_("Fig id")) + figlabel = TextBlock(required=False, label=_("Fig label")) + title = TextBlock(required=False, label=_("Title")) + alttext = TextBlock(required=False, label=_("Alt text")) + image = ImageChooserBlock(required=True) + + class Meta: + label = _("Image") + + +class TableBlock(StructBlock): + label = ChoiceBlock( + choices=front_labels, + required=False, + label=_("Label") + ) + tabid = TextBlock(required=False, label=_("Table id")) + tablabel = TextBlock(required=False, label=_("Table label")) + title = TextBlock(required=False, label=_("Title")) + content = TextBlock(required=False, label=_("Content")) + + class Meta: + label = _("Table") + + +class AuthorParagraphBlock(ParagraphBlock): + surname = TextBlock(required=False, label=_("Surname")) + given_names = TextBlock(required=False, label=_("Given names")) + orcid = TextBlock(required=False, label=_("Orcid")) + affid = TextBlock(required=False, label=_("Aff id")) + char = TextBlock(required=False, label=_("Char link")) + + class Meta: + label = _("Author Paragraph") + + +class AffParagraphBlock(ParagraphBlock): + affid = TextBlock(required=False, label=_("Aff id")) + text_aff = TextBlock(required=False, label=_("Full text Aff")) + char = TextBlock(required=False, label=_("Char link")) + orgname = TextBlock(required=False, label=_("Orgname")) + orgdiv2 = TextBlock(required=False, label=_("Orgdiv2")) + orgdiv1 = TextBlock(required=False, label=_("Orgdiv1")) + zipcode = TextBlock(required=False, label=_("Zipcode")) + city = TextBlock(required=False, label=_("City")) + state = TextBlock(required=False, label=_("State")) + country = TextBlock(required=False, label=_("Country")) + code_country = TextBlock(required=False, label=_("Code country")) + original = TextBlock(required=False, label=_("Original")) + + class Meta: + label = _("Aff Paragraph") + + +class RefNameBlock(StructBlock): + surname = TextBlock(required=False, label=_("Surname")) + given_names = TextBlock(required=False, label=_("Given names")) + + +class RefParagraphBlock(ParagraphBlock): + reftype = TextBlock(required=False, label=_("Ref type")) + refid = TextBlock(required=False, label=_("Ref id")) + #authors = ListBlock(RefNameBlock(), label=_("Authors")) + authors = StreamBlock([ + ('Author', RefNameBlock()), + ], label=_("Authors"), required=False) + date = TextBlock(required=False, label=_("Date")) + title = TextBlock(required=False, label=_("Title")) + chapter = TextBlock(required=False, label=_("Chapter")) + edition = TextBlock(required=False, label=_("Edition")) + source = TextBlock(required=False, label=_("Source")) + vol = TextBlock(required=False, label=_("Vol")) + issue = TextBlock(required=False, label=_("Issue")) + pages = TextBlock(required=False, label=_("Pages")) + fpage = TextBlock(required=False, label=_("First page")) + lpage = TextBlock(required=False, label=_("Last page")) + doi = TextBlock(required=False, label=_("DOI")) + access_id = TextBlock(required=False, label=_("Access id")) + degree = TextBlock(required=False, label=_("Degree")) + organization = TextBlock(required=False, label=_("Organization")) + location = TextBlock(required=False, label=_("Location")) + org_location = TextBlock(required=False, label=_("Org location")) + num_pages = TextBlock(required=False, label=_("Num pages")) + uri = TextBlock(required=False, label=_("Uri")) + version = TextBlock(required=False, label=_("Version")) + access_date = TextBlock(required=False, label=_("Access date")) + + class Meta: + label = _("Ref Paragraph") + + +class CollectionValuesModel(models.Model): + acron = models.CharField(max_length=10, unique=True) + name = models.CharField(max_length=255) + + autocomplete_search_field = "acron" + + def autocomplete_label(self): + return str(self) + + def __str__(self): + return f"{self.acron.upper()} - {self.name}" + + +class CollectionModel(models.Model): + collection = models.ForeignKey(CollectionValuesModel, null=True, blank=True, on_delete=models.SET_NULL) + + autocomplete_search_field = "collection.acron" + + def autocomplete_label(self): + return str(self) + + panels = [ + AutocompletePanel('collection'), + ] + + def __str__(self): + return f"{self.collection.acron.upper()} - {self.collection.acron}" + + +class JournalModel(models.Model): + title = models.TextField(_("Title"), null=True, blank=True) + short_title = models.TextField(_("Short Title"), null=True, blank=True) + title_nlm = models.TextField(_("NLM Title"), null=True, blank=True) + acronym = models.TextField(_("Acronym"), null=True, blank=True) + issn = models.TextField(_("ISSN (id SciELO)"), null=True, blank=True) + pissn = models.TextField(_("Print ISSN"), null=True, blank=True) + eissn = models.TextField(_("Electronic ISSN"), null=True, blank=True) + pubname = models.TextField(_("Publisher Name"), null=True, blank=True) + + autocomplete_search_field = "title" + + class Meta: + unique_together = ('title',) + + def autocomplete_label(self): + return str(self) + + def __str__(self): + return self.title + + +def get_default_collection_acron(): + try: + obj = CollectionModel.objects.select_related('collection').first() + return obj.collection.acron if obj and obj.collection else '' + except Exception: + return '' + + +class ArticleDocxMarkup(CommonControlField, ClusterableModel): + title = models.TextField(_("Document Title"), null=True, blank=True) + file = models.FileField( + null=True, + blank=True, + verbose_name=_("Document"), + upload_to='uploads_docx/', + ) + estatus = models.IntegerField( + _("Process estatus"), + choices=ProcessStatus.choices, + blank=True, + default=ProcessStatus.PROCESSING + ) + + collection = models.CharField(max_length=10, default=get_default_collection_acron) + journal = models.ForeignKey(JournalModel, null=True, blank=True, on_delete=models.SET_NULL) + + journal_title = models.TextField(_("Journal Title"), null=True, blank=True) + acronym = models.TextField(_("Acronym"), null=True, blank=True) + short_title = models.TextField(_("Short Title"), null=True, blank=True) + title_nlm = models.TextField(_("NLM Title"), null=True, blank=True) + issn = models.TextField(_("ISSN (id SciELO)"), null=True, blank=True) + pissn = models.TextField(_("Print ISSN"), null=True, blank=True) + eissn = models.TextField(_("Electronic ISSN"), null=True, blank=True) + nimtitle = models.TextField(_("Nimtitle"), null=True, blank=True) + pubname = models.TextField(_("Publisher Name"), null=True, blank=True) + license = models.URLField( + max_length=500, + blank=True, + null=True, + verbose_name=_("License (URL)") + ) + vol = models.IntegerField( + verbose_name=_("Volume"), + null=True, + blank=True + ) + supplvol = models.IntegerField( + verbose_name=_("Suppl Volume"), + null=True, + blank=True + ) + issue = models.IntegerField( + verbose_name=_("Issue"), + null=True, + blank=True + ) + supplno = models.IntegerField( + verbose_name=_("Suppl Num"), + null=True, + blank=True + ) + issid_part = models.TextField(_("Isid Part"), null=True, blank=True) + dateiso = models.TextField(_("Dateiso"), null=True, blank=True) + month = models.TextField(_("Month/Season"), null=True, blank=True) + fpage = models.TextField(_("First Page"), null=True, blank=True) + seq = models.TextField(_("@Seq"), null=True, blank=True) + lpage = models.TextField(_("Last Page"), null=True, blank=True) + elocatid = models.TextField(_("Elocation ID"), null=True, blank=True) + order = models.TextField(_("Order (In TOC)"), null=True, blank=True) + pagcount = models.TextField(_("Pag count"), null=True, blank=True) + doctopic = models.TextField(_("Doc Topic"), null=True, blank=True) + language = models.CharField( + _("Language"), + max_length=10, + choices=LANGUAGE, + null=True, + blank=True + ) + spsversion = models.TextField(_("Sps version"), null=True, blank=True) + artdate = models.DateField(_("Artdate"), null=True, blank=True) + ahpdate = models.DateField(_("Ahpdate"), null=True, blank=True) + + file_xml = models.FileField( + null=True, + blank=True, + verbose_name=_("Document xml"), + upload_to='generate_xml/', + ) + + text_xml = models.TextField(_("Text XML"), null=True, blank=True) + + content = StreamField([ + ('paragraph_with_language', ParagraphWithLanguageBlock()), + ('paragraph', ParagraphBlock()), + ('author_paragraph', AuthorParagraphBlock()), + ('aff_paragraph', AffParagraphBlock()), + ], blank=True, use_json_field=True) + + content_body = StreamField([ + ('paragraph', ParagraphBlock()), + ('paragraph_with_language', ParagraphWithLanguageBlock()), + ('compound_paragraph', CompoundParagraphBlock()), + ('image', ImageBlock()), + ('table', TableBlock()), + ], blank=True, use_json_field=True) + + content_back = StreamField([ + ('paragraph', ParagraphBlock()), + ('ref_paragraph', RefParagraphBlock()), + ], blank=True, use_json_field=True) + + panels = [ + FieldPanel("title"), + FieldPanel("file"), + FieldPanel("collection"), + AutocompletePanel("journal") + ] + + def __unicode__(self): + return f"{self.title} | {self.estatus}" + + def __str__(self): + return f"{self.title} | {self.estatus}" + + @property + def url_download(self): + return self.file_xml.url if self.file_xml else None + + @classmethod + def create(cls, title, doi): + obj = cls() + obj.title = title + obj.doi = doi + obj.save() + return obj + + @classmethod + def get( + cls, + title): + return cls.objects.get(title=title) + + @classmethod + def update(cls, title, estatus): + try: + obj = cls.get(title=title) + except (cls.DoesNotExist, ValueError): + pass + + obj.estatus = estatus + obj.save() + return obj + + base_form_class = CoreAdminModelForm + + +class UploadDocx(ArticleDocxMarkup): + panels_doc = [ + FieldPanel("title"), + FieldPanel("file"), + ] + + edit_handler = TabbedInterface( + [ + ObjectList(panels_doc, heading=_("Document")), + ] + ) + + class Meta: + proxy = True + + +class MarkupXML(ArticleDocxMarkup): + panels_front = [ + FieldPanel('content'), + #InlinePanel("element_docx", label=_("Elements Docx")), + ] + + panels_body = [ + FieldPanel('content_body'), + ] + + panels_back = [ + FieldPanel('content_back'), + ] + + panels_xml = [ + FieldPanel('file_xml', widget=ReadOnlyFileWidget()), + FieldPanel('text_xml'), + ] + + panels_details = [ + FieldPanel('collection'), + AutocompletePanel('journal'), + FieldPanel('journal_title'), + FieldPanel('short_title'), + FieldPanel('title_nlm'), + FieldPanel('acronym'), + FieldPanel('issn'), + FieldPanel('pissn'), + FieldPanel('eissn'), + FieldPanel('nimtitle'), + FieldPanel('pubname'), + FieldPanel('license'), + FieldPanel('vol'), + FieldPanel('supplvol'), + FieldPanel('issue'), + FieldPanel('supplno'), + FieldPanel('issid_part'), + + FieldPanel('dateiso'), + FieldPanel('month'), + FieldPanel('fpage'), + FieldPanel('seq'), + FieldPanel('lpage'), + FieldPanel('elocatid'), + FieldPanel('order'), + FieldPanel('pagcount'), + FieldPanel('doctopic'), + FieldPanel('language'), + FieldPanel('spsversion'), + FieldPanel('artdate'), + FieldPanel('ahpdate'), + ] + + edit_handler = TabbedInterface( + [ + ObjectList(panels_xml, heading="XML"), + ObjectList(panels_details, heading=_("Details")), + ObjectList(panels_front, heading="Front"), + ObjectList(panels_body, heading="Body"), + ObjectList(panels_back, heading="Back"), + ] + ) + + class Meta: + proxy = True \ No newline at end of file diff --git a/markup_doc/sync_api.py b/markup_doc/sync_api.py new file mode 100644 index 0000000..0eb945d --- /dev/null +++ b/markup_doc/sync_api.py @@ -0,0 +1,128 @@ +import requests +from django.db import transaction +from markup_doc.models import CollectionValuesModel, JournalModel, CollectionModel + +def sync_collection_from_api(): + url = "https://core.scielo.org/api/v2/pid/collection/" + all_results = [] + + while url: + print(url) + response = requests.get(url, headers={"Accept": "application/json"}, timeout=(10, 60)) + data = response.json() + all_results.extend(data['results']) + url = data['next'] + + # Borra todo + print('Borrando...') + CollectionModel.objects.all().delete() + deleted_count, _ = CollectionValuesModel.objects.all().delete() + print('Borrado...') + + for item in all_results: + acron = item.get('acron3') + name = item.get('main_name', '').strip() + if acron and name: + print(name) + CollectionValuesModel.objects.update_or_create( + acron=acron, + defaults={'name': name} + ) + + +def sync_journals_from_api(): + journals = JournalModel.objects.all() + if journals.exists(): + deleted_count, _ = journals.delete() + + obj = CollectionModel.objects.select_related('collection').first() + + acron_selected = obj.collection.acron if obj and obj.collection else None + + new_journals = [] + + if acron_selected: + + url = "https://core.scielo.org/api/v2/pid/journal/" + retries = 3 + + while url: + try: + response = requests.get(url, headers={"Accept": "application/json"}, timeout=(10, 60)) + response.raise_for_status() + data = response.json() + retries = 3 + + for item in data["results"]: + title = item.get("title", None) + short_title = item.get("short_title", None) + acronym = item.get("acronym", None) + pissn = item.get("official", {}).get("issn_print", None) + eissn = item.get("official", {}).get("issn_electronic", None) + acronym = item.get("acronym", None) + pubname = item.get("publisher", []) + title_in_database = item.get("title_in_database", []) + title_nlm = None + + if title_in_database: + for t in title_in_database: + if t.get("name", None) == 'MEDLINE': + title_nlm = t.get("title", None) + + if pubname: + pubname = pubname[0].get("name", None) + + scielo_journals = item.get("scielo_journal", []) + + # Obtener la primera colección asociada, si existe + collection_acron = None + if scielo_journals: + collection_acron = scielo_journals[0].get("collection_acron") + issn_scielo = scielo_journals[0].get("issn_scielo", None) + + if not title or acron_selected != collection_acron: + continue # Saltar si falta el título + + #collection_instance = None + #if collection_acron: + # collection_instance, _ = CollectionModel.objects.get_or_create( + # acron=collection_acron, + # defaults={'name': collection_acron.upper()} + # ) + + # Crear o actualizar el journal + print(title) + print(item) + journal = JournalModel( + title=title, + short_title=short_title or None, + title_nlm = title_nlm or None, + acronym=acronym or None, + issn=issn_scielo or None, + pissn=pissn or None, + eissn=eissn or None, + pubname=pubname or None, + # collection=collection_instance, + # defaults={} + ) + new_journals.append(journal) + + url = data.get("next") + except requests.exceptions.ChunkedEncodingError as e: + print("ERROR:", e) + retries -= 1 + if retries == 0: + break + continue + except Exception as e: + print("**ERROR url") + print("URL:", url) + print("TIPO:", type(e).__name__) + print("ERROR:", str(e)) + url = None + + # Guardar todo junto + if new_journals: + with transaction.atomic(): + JournalModel.objects.bulk_create(new_journals, ignore_conflicts=True) + diff --git a/markup_doc/tasks.py b/markup_doc/tasks.py new file mode 100644 index 0000000..8aa92aa --- /dev/null +++ b/markup_doc/tasks.py @@ -0,0 +1,306 @@ +# Local application imports +from config import celery_app + +# Standard library imports +import json +import re + +# Third-party imports +import langid + +from markup_doc.models import UploadDocx +from markup_doc.labeling_utils import ( + split_in_three, + process_reference, + process_references, + extract_keywords, + create_labeled_object2, + get_data_first_block, + get_llm_model_name, + create_special_content_object +) + +from markup_doc.models import ProcessStatus +from markup_doc.labeling_utils import MODEL_NAME_GEMINI, MODEL_NAME_LLAMA +from markuplib.function_docx import functionsDocx +from model_ai.llama import LlamaService, LlamaInputSettings +from reference.config_gemini import create_prompt_reference +from markup_doc.sync_api import sync_journals_from_api + + +def clean_labels(text): + # Eliminar etiquetas tipo [kwd] o [sectitle], incluso si tienen espacios como [/ doctitle ] + text = re.sub(r'\[\s*/?\s*\w+(?:\s+[^\]]+)?\s*\]', '', text) + + # Reemplazar múltiples espacios por uno solo + text = re.sub(r'[ \t]+', ' ', text) + + # Eliminar espacios antes de los signos de puntuación + text = re.sub(r'\s+([;:,.])', r'\1', text) + + # Normalizar múltiples saltos de línea + text = re.sub(r'\n+', '\n', text) + + # Quitar espacios al principio y final + return text.strip() + + +@celery_app.task() +def task_sync_journals_from_api(): + sync_journals_from_api() + + +@celery_app.task() +def get_labels(title, user_id): + article_docx = UploadDocx.objects.get(title=title) + doc = functionsDocx.openDocx(article_docx.file.path) + sections, content = functionsDocx().extractContent(doc, article_docx.file.path) + article_docx_markup = article_docx + text_title = '' + text_paragraph = '' + stream_data = [] + stream_data_body = [] + stream_data_back = [] + num_ref=0 + state = { + 'label': None, + 'label_next': None, + 'label_next_reset': None, + 'reset': False, + 'repeat': None, + 'body_trans': False, + 'body': False, + 'back': False, + 'references': False + } + counts = { + 'numref': 0, + 'numtab': 0, + 'numfig': 0, + 'numeq': 0 + } + + next_item = None + obj_reference = [] + llama_model = False + + for i, item in enumerate(content): + if next_item: + next_item = None + continue + + obj = {} + if item.get('type') in [ + '<abstract>', + '<date-accepted>', + '<date-received>', + '<kwd-group>' + ]: + if item.get('type') == '<abstract>': + if i + 1 < len(content): + obj['type'] = 'paragraph' + obj['value'] = { + 'label': '<abstract-title>', + 'paragraph': item.get('text') + } + stream_data.append(obj.copy()) + + next_item = content[i + 1] + obj['type'] = 'paragraph_with_language' + obj['value'] = { + 'label': '<abstract>', + 'paragraph': next_item.get('text'), + 'language': langid.classify(next_item.get('text'))[0] or None + } + stream_data.append(obj.copy()) + + elif item.get('type') == '<kwd-group>': + keywords = extract_keywords(item.get('text')) + obj['type'] = 'paragraph' + obj['value'] = { + 'label': '<kwd-title>', + 'paragraph': keywords['title'] + } + stream_data.append(obj.copy()) + + obj['type'] = 'paragraph_with_language' + obj['value'] = { + 'label': '<kwd-group>', + 'paragraph': keywords['keywords'], + 'language': langid.classify(keywords['title'].replace('<italic>', '').replace('</italic>', ''))[0] or None + } + stream_data.append(obj.copy()) + + else: + obj['type'] = 'paragraph' + obj['value'] = { + 'label': item.get('type') , + 'paragraph': item.get('text') + } + stream_data.append(obj.copy()) + continue + + if item.get('type') == 'first_block': + llm_first_block = LlamaService(mode='prompt', temperature=0.1) + + if get_llm_model_name() == MODEL_NAME_GEMINI: + output = llm_first_block.run(LlamaInputSettings.get_first_metadata(clean_labels(item.get('text')))) + match = re.search(r'\{.*\}', output, re.DOTALL) + if match: + output = match.group(0) + output = json.loads(output) + + if get_llm_model_name() == MODEL_NAME_LLAMA: + + output_author = get_data_first_block(clean_labels(item.get('text')), 'author', user_id) + + output_affiliation = get_data_first_block(clean_labels(item.get('text')), 'affiliation', user_id) + + output_doi = get_data_first_block(clean_labels(item.get('text')), 'doi', user_id) + + output_title = get_data_first_block(clean_labels(item.get('text')), 'title', user_id) + + # 1. Parsear cada salida + doi_section = output_doi + titles = output_title + authors = output_author + affiliations = output_affiliation + + # 2. Combinar en un único JSON + output = { + "doi": doi_section.get("doi", ""), + "section": doi_section.get("section", ""), + "titles": titles, + "authors": authors, + "affiliations": affiliations + } + + obj['type'] = 'paragraph' + obj['value'] = { + 'label': '<article-id>', + 'paragraph': output['doi'] + } + stream_data.append(obj.copy()) + obj['value'] = { + 'label': '<subject>', + 'paragraph': output['section'] + } + stream_data.append(obj.copy()) + for i, tit in enumerate(output['titles']): + obj['type'] = 'paragraph_with_language' + obj['value'] = { + 'label': '<article-title>' if i == 0 else '<trans-title>', + 'paragraph': tit['title'], + 'language': tit['language'] + } + stream_data.append(obj.copy()) + + for i, auth in enumerate(output['authors']): + obj['type'] = 'author_paragraph' + obj['value'] = { + 'label': '<contrib>', + 'surname': auth['surname'], + 'given_names': auth['name'], + 'orcid': auth['orcid'], + 'affid': auth['aff'], + 'char': auth['char'] + } + stream_data.append(obj.copy()) + + for i, aff in enumerate(output['affiliations']): + obj['type'] = 'aff_paragraph' + obj['value'] = { + 'label': '<aff>', + 'affid': aff['aff'], + 'char': aff['char'], + 'orgname': aff['orgname'], + 'orgdiv2': aff['orgdiv2'], + 'orgdiv1': aff['orgdiv1'], + 'zipcode': aff['postal'], + 'city': aff['city'], + 'country': aff['name_country'], + 'code_country': aff['code_country'], + 'state': aff['state'], + 'text_aff': aff['text_aff'], + #'original': aff['original'] + } + stream_data.append(obj.copy()) + + if item.get('type') in ['image', 'table', 'list', 'compound']: + obj, counts = create_special_content_object(item, stream_data_body, counts) + stream_data_body.append(obj) + continue + + if item.get('text') is None or item.get('text') == '': + state['label_next'] = state['label_next_reset'] if state['reset'] else state['label_next'] + if state['back']: + state['back'] = False + state['body'] = False + state['references'] = True + else: + + obj, result, state = create_labeled_object2(i, item, state, sections) + + if result: + if item.get('text').lower() in ['introducción', 'introduction', 'introdução'] and state['references']: + state['body_trans'] = True + obj_trans = { + 'type': 'paragraph_with_language', + 'value': { + 'label': '<translate-body>', + 'paragraph': 'Translate' + } + } + stream_data_body.append(obj_trans) + if state['body']: + if state['references']: + if state['body_trans']: + stream_data_body.append(obj) + else: + stream_data.append(obj) + else: + stream_data_body.append(obj) + elif state['back']: + if state['label'] == '<sec>': + stream_data_back.append(obj) + if state['label'] == '<p>': + num_ref = num_ref + 1 + #obj = {}#process_reference(num_ref, obj, user_id) + obj_reference.append({"num_ref": num_ref, "obj": obj, "text": obj['value']['paragraph'],}) + #stream_data_back.append(obj) + else: + stream_data.append(obj) + + num_refs = [item["num_ref"] for item in obj_reference] + + if get_llm_model_name() == 'LLAMA': + for obj_ref in obj_reference: + obj = process_reference(obj_ref['num_ref'], obj_ref['obj'], user_id) + stream_data_back.append(obj) + + else: + chunks = split_in_three(obj_reference) + output=[] + + for chunk in chunks: + if len(chunk) > 0: + text_references = "\n".join([item["text"] for item in chunk]).replace('<italic>', '').replace('</italic>', '') + prompt_reference = create_prompt_reference(text_references) + + result = llm_first_block.run(prompt_reference) + + match = re.search(r'\[.*\]', result, re.DOTALL) + if match: + parsed = json.loads(match.group(0)) + output.extend(parsed) # Agrega a la lista de salida + + stream_data_back.extend(process_references(num_refs, output)) + + article_docx_markup.content = stream_data + article_docx_markup.content_body = stream_data_body + article_docx_markup.content_back = stream_data_back + article_docx_markup.save() + + article_docx.estatus = ProcessStatus.PROCESSED + article_docx.save() + diff --git a/markup_doc/tests.py b/markup_doc/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/markup_doc/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/markup_doc/wagtail_hooks.py b/markup_doc/wagtail_hooks.py new file mode 100644 index 0000000..044022a --- /dev/null +++ b/markup_doc/wagtail_hooks.py @@ -0,0 +1,230 @@ +from django.http import HttpResponseRedirect +from django.utils.translation import gettext_lazy as _ +from django.contrib import messages +from django.template.response import TemplateResponse +from wagtail_modeladmin.options import ModelAdmin + +from wagtail.snippets.views.snippets import ( + CreateView, + EditView, + SnippetViewSet, + SnippetViewSetGroup +) + +from markup_doc.models import ( + ArticleDocx, + ArticleDocxMarkup, + UploadDocx, + MarkupXML, + CollectionModel, + JournalModel, + ProcessStatus +) + +from config.menu import get_menu_order +from markup_doc.tasks import get_labels, task_sync_journals_from_api +from django.urls import path, reverse +from django.utils.html import format_html +from wagtail.admin import messages +from wagtail.admin.views import generic + +from django.shortcuts import redirect, get_object_or_404 +from django.views import View + +from wagtail.snippets.models import register_snippet +from django.db.models.signals import post_save +from django.dispatch import receiver +from django.db import transaction + +from wagtail import hooks +from django.templatetags.static import static +from markup_doc.sync_api import sync_collection_from_api, sync_journals_from_api + + +class ArticleDocxCreateView(CreateView): + #def get_form_class(self): + def dispatch(self, request, *args, **kwargs): + if not CollectionModel.objects.exists(): + messages.warning(request, "Debes seleccionar primero una colección.") + return HttpResponseRedirect(self.get_success_url()) + if not JournalModel.objects.exists(): + messages.warning(request, "Espera un momento, aún no existen elementos en Journal.") + return HttpResponseRedirect(self.get_success_url()) + return super().dispatch(request, *args, **kwargs) + + def form_valid(self, form): + self.object = form.save_all(self.request.user) + self.object.estatus = ProcessStatus.PROCESSING + self.object.save() + transaction.on_commit(lambda: get_labels.delay(self.object.title, self.request.user.id)) + return HttpResponseRedirect(self.get_success_url()) + + +class ArticleDocxEditView(EditView): + def form_valid(self, form): + form.instance.updated_by = self.request.user + form.instance.save() + return HttpResponseRedirect(self.get_success_url()) + + +class ArticleDocxAdmin(ModelAdmin): + model = ArticleDocx + create_view_class = ArticleDocxCreateView + menu_label = _("Documents") + menu_icon = "folder" + menu_order = 1 + add_to_settings_menu = False # or True to add your model to the Settings sub-menu + exclude_from_explorer = ( + False # or True to exclude pages of this type from Wagtail's explorer view + ) + list_per_page = 20 + list_display = ( + "title", + "get_estatus_display" + ) + + +class ArticleDocxMarkupCreateView(CreateView): + def form_valid(self, form): + self.object = form.save_all(self.request.user) + return HttpResponseRedirect(self.get_success_url()) + + +class ArticleDocxMarkupAdmin(ModelAdmin): + model = ArticleDocxMarkup + create_view_class = ArticleDocxMarkupCreateView + menu_label = _("Documents Markup") + menu_icon = "folder" + menu_order = 1 + add_to_settings_menu = False # or True to add your model to the Settings sub-menu + exclude_from_explorer = ( + False # or True to exclude pages of this type from Wagtail's explorer view + ) + list_per_page = 20 + + +class UploadDocxViewSet(SnippetViewSet): + model = UploadDocx + add_view_class = ArticleDocxCreateView + menu_label = _("Carregar DOCX") + menu_icon = "folder" + menu_order = 1 + add_to_settings_menu = False + exclude_from_explorer = False + list_per_page = 20 + list_display = ( + "title", + "get_estatus_display" # Usar estatus, não status + ) + search_fields = ("title",) + list_filter = ("estatus",) # Usar estatus, não status + + +class MarkupXMLViewSet(SnippetViewSet): + model = MarkupXML + add_view_class = ArticleDocxMarkupCreateView + edit_view_class = ArticleDocxEditView + menu_label = _("XML marcado") # Alterado de "MarkupXML" + menu_icon = "folder" + menu_order = 1 + add_to_settings_menu = False + exclude_from_explorer = False + list_display=("title", ) + list_per_page = 20 + search_fields = ("title",) + +""" +class MarkupAdminGroup(ModelAdminGroup): + menu_label = _("Markup") + menu_icon = "folder-open-inverse" + menu_order = 1 + items = (UploadDocxAdmin, MarkupXMLAdmin) + +modeladmin_register(MarkupAdminGroup) +""" + +class CollectionModelCreateView(CreateView): + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + sync_collection_from_api() + return context + + def form_valid(self, form): + form.instance.save() + task_sync_journals_from_api.delay() + return HttpResponseRedirect(self.get_success_url()) + + """ + def get_initial(self): + initial = super().get_initial() + initial["campo"] = "valor inicial dinámico" + return initial + """ + + +class CollectionModelViewSet(SnippetViewSet): + model = CollectionModel + add_view_class = CollectionModelCreateView + menu_label = _("Modelo de Coleções") # Alterado de "CollectionModel" + menu_icon = "folder" + menu_order = 1 + add_to_settings_menu = False + exclude_from_explorer = False + list_per_page = 20 + list_display = ( + "collection", + ) + + +class JournalModelCreateView(CreateView): + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + task_sync_journals_from_api + return context + + +class JournalModelViewSet(SnippetViewSet): + model = JournalModel + menu_label = _("Modelo de Revistas") # Alterado de "JournalModel" + menu_icon = "folder" + menu_order = 1 + add_to_settings_menu = False + exclude_from_explorer = False + list_per_page = 20 + list_display = ( + "title", + ) + + def index_view(self, request): + response = super().index_view(request) + + if isinstance(response, TemplateResponse): + if not CollectionModel.objects.exists(): + messages.warning(request, "Debes seleccionar primero una colección.") + response.context_data["can_add"] = False + response.context_data["can_add_snippet"] = False + return response + + if not JournalModel.objects.exists(): + messages.warning(request, "Sincronizando journals desde la API, espera unos momentos…") + response.context_data["can_add"] = False + response.context_data["can_add_snippet"] = False + return response + + return response + + +class MarkupSnippetViewSetGroup(SnippetViewSetGroup): + menu_name = 'docx_files' # Renomeado de 'docx_processor' + menu_label = _('DOCX Files') + menu_icon = "folder-open-inverse" + menu_order = 0 # Mudado de 1 para 0 para ficar na primeira posição + items = ( + UploadDocxViewSet, + MarkupXMLViewSet, + CollectionModelViewSet, + JournalModelViewSet + ) + + +register_snippet(MarkupSnippetViewSetGroup) \ No newline at end of file diff --git a/markuplib/__init__.py b/markuplib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/markuplib/function_docx.py b/markuplib/function_docx.py new file mode 100644 index 0000000..255c13e --- /dev/null +++ b/markuplib/function_docx.py @@ -0,0 +1,587 @@ +import docx +from docx.oxml.table import CT_Tbl +from docx.oxml.text.paragraph import CT_P +from docx.oxml.ns import qn +from lxml import etree, objectify +from django.core.files.base import ContentFile +import re, zipfile +import os +from wagtail.images import get_image_model + +ImageModel = get_image_model() + + +class functionsDocx: + + def openDocx(filename): + doc = docx.Document(filename) + return doc + + # Función: solo reemplaza mfenced que NO tengan atributos open/close y que usen | + def replace_mfenced_pipe_only(self, mathml_root): + mml_ns = "http://www.w3.org/1998/Math/MathML" + for mfenced in mathml_root.xpath(".//mml:mfenced", namespaces={"mml": mml_ns}): + has_open = mfenced.get("open") + has_close = mfenced.get("close") + separators = mfenced.get("separators", "") + + # Solo reemplazar si: no tiene open/close y usa barra + if not has_open and not has_close and separators == "|": + mrow = etree.Element(f"{{{mml_ns}}}mrow") + + mo_open = etree.Element(f"{{{mml_ns}}}mo") + mo_open.text = "(" + mo_close = etree.Element(f"{{{mml_ns}}}mo") + mo_close.text = ")" + + mrow.append(mo_open) + for child in list(mfenced): + mrow.append(child) + mrow.append(mo_close) + + parent = mfenced.getparent() + if parent is not None: + parent.replace(mfenced, mrow) + return mathml_root + + + def extract_numbering_info(self, docx_path): + # Diccionario para mapear numId a su tipo (numerada o viñeta) + numbering_map = {} + namespaces = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}' + + # Abrir el archivo DOCX como un archivo ZIP + with zipfile.ZipFile(docx_path, 'r') as docx: + # Verificar si existe el archivo numbering.xml + if 'word/numbering.xml' in docx.namelist(): + # Extraer el archivo numbering.xml + numbering_xml = docx.read('word/numbering.xml') + # Parsear el XML + numbering_tree = etree.fromstring(numbering_xml) + + # Buscar todas las definiciones abstractas de numeración + for abstract_num in numbering_tree.findall('.//w:abstractNum', namespaces=numbering_tree.nsmap): + abstract_num_id = abstract_num.get(namespaces+'abstractNumId') + # Revisar los niveles dentro de la definición abstracta + for lvl in abstract_num.findall('.//w:lvl', namespaces=abstract_num.nsmap): + num_fmt = lvl.find('.//w:numFmt', lvl.nsmap).get(namespaces+'val') + ilvl = lvl.get(namespaces+'ilvl') + + # Asignar el tipo según el valor de numFmt + if abstract_num_id not in numbering_map: + numbering_map[abstract_num_id] = {} + + numbering_map[abstract_num_id][ilvl] = num_fmt + + # Relacionar numId con su abstractNumId + for num in numbering_tree.findall('.//w:num', namespaces=numbering_tree.nsmap): + num_id = num.get(namespaces+'numId') + abstract_num_id = num.find('.//w:abstractNumId', namespaces=num.nsmap).get(namespaces+'val') + if abstract_num_id in numbering_map: + numbering_map[abstract_num_id]['numId'] = num_id + else: + numbering_map = None + + return numbering_map + + + def extract_hiperlinks_info(self, docx_path): + hiperlinks = [] + with zipfile.ZipFile(docx_path, 'r') as docx: + # Leer relaciones del documento + rels_path = 'word/_rels/document.xml.rels' + if rels_path in docx.namelist(): + rels_data = docx.read(rels_path) + rels_root = etree.fromstring(rels_data) + + # Buscar hipervínculos + for rel in rels_root.findall('{http://schemas.openxmlformats.org/package/2006/relationships}Relationship'): + r_id = rel.attrib['Id'] + target = rel.attrib['Target'] + if rel.attrib['Type'].endswith('/hyperlink'): + hiperlinks.append((r_id, target)) + + return dict(hiperlinks) + + + def extract_hiperlink(self, element, rels_map, namespaces): + links = [] + + # 1. Buscar hipervínculos de texto (recursivo con .//) + for hyperlink in element.findall('.//w:hyperlink', namespaces=namespaces): + r_id = hyperlink.attrib.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id') + if r_id and r_id in rels_map: + links.append(rels_map[r_id]) + + # 2. Buscar hipervínculos en imágenes (recursivo con .//) + for hlink in element.findall('.//a:hlinkClick', namespaces=namespaces): + r_id = hlink.attrib.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id') + if r_id and r_id in rels_map: + links.append(rels_map[r_id]) + + return ' '.join(links) if links else None + + + def extractContent(self, doc, doc_path): + + list_types = self.extract_numbering_info(doc_path) + + hiperlinks_info = self.extract_hiperlinks_info(doc_path) + + found_hiperlinks = True + + # Obtener el directorio actual del archivo .py + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + + # Construir la ruta completa al archivo XSLT + xslt_path = os.path.join(BASE_DIR, "omml2mml.xsl") + + # Cargar XSLT y prepararlo + xslt = etree.parse(xslt_path) + transform = etree.XSLT(xslt) + + def match_paragraph(text): + keywords = r'(?im)^\s*(?:<italic>)?\s*(palabra(?:s)?\s*clave|palavras?\s*-?\s*chave|keywords?)\s*(?:</italic>)?\s*(?::|<italic>\s*:\s*</italic>)\s*(.+)$' + #history = r'\d{2}/\d{2}/\d{4}' + #corresp = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' + abstract = r'(?i)^resumen|^resumo|^abstract' + accepted = r'(?i)aceptado|accepted|aceited|aprovado' + received = r'(?i)recibido|received|recebido' + + if re.search(keywords, text): + return '<kwd-group>' + #if re.search(history, text): + #return '<history>' + #if re.search(corresp, text): + #return '<corresp>' + if re.search(abstract, text): + return '<abstract>' + if re.search(accepted, text): + return '<date-accepted>' + if re.search(received, text): + return '<date-received>' + return False + + def matches_section(a, b): + try: + return ( + a.get('size') == b.get('size') and + a.get('bold') == b.get('bold') and + a.get('isupper') == b.get('isupper') + ) + except Exception as e: + print(f"Error comparando secciones: {e}") + return False + + def section_priority(sections): + return (-sections['size'], not sections['bold'], not sections['isupper']) + + def identify_section(sections, size, bold, text): + if size == 0: + return sections + + isupper = text.isupper() + s_id = {'size': size, 'bold': bold, 'isupper': isupper, 'count': 0} + + if len(sections) == 0: + sections.append(s_id) + return sections + + for section in sections: + if matches_section(s_id, section): + section['count'] += 1 + return sections + + sections.append(s_id) + return sections + + def clean_labels(text): + # Eliminar etiquetas cuadradas tipo [ ... ] con espacios opcionales + extract_label = re.sub(r'\[\s*/?\s*[\w-]+(?:\s+[^\]]+)?\s*\]', '', text) + + # Reemplazar múltiples espacios por uno solo + clean_text = re.sub(r'\s+', ' ', extract_label) + + # Eliminar espacio antes de signos de puntuación + clean_text = re.sub(r'\s+([;:,.])', r'\1', clean_text) + + return clean_text.strip() + + def extrae_Tabla(element, rels_map, namespaces): + # Inicializa la estructura HTML de la tabla + html = "<table border='1'>\n" + + # Almacena las combinaciones para las celdas + rowspan_dict = {} # {(row, col): rowspan_count} + colspan_dict = {} # {(row, col): colspan_count} + + # Itera sobre las filas de la tabla + for i, row in enumerate(element.xpath('.//w:tr')): + hiperlinks = self.extract_hiperlink(row, rels_map, namespaces) if found_hiperlinks else None + + html += " <tr>\n" + # Itera sobre las celdas de cada fila + j = 0 # índice de columna + for cell in row.xpath('.//w:tc'): + # Revisa si la celda está en una posición afectada por rowspan + while (i, j) in rowspan_dict and rowspan_dict[(i, j)] > 0: + # Reduce el contador de rowspan + rowspan_dict[(i, j)] -= 1 + j += 1 # Mueve a la siguiente columna + + # Revisa las propiedades de la celda para rowspan y colspan + cell_props = cell.xpath('.//w:tcPr') + rowspan = 1 + colspan = 1 + + # Procesa rowspan (vMerge) + v_merge_fin = False + v_merge = cell.xpath('.//w:vMerge') + if v_merge: + v_merge_val = v_merge[0].get(qn('w:val')) + if v_merge_val == "restart": + # Es el inicio de una combinación vertical + rowspan = 1 + # Busca el total de filas combinadas contando hacia abajo + k = i + 1 + while k < len(element.xpath('.//w:tr')): + try: + next_cell = element.xpath('.//w:tr')[k].xpath('.//w:tc')[j] + next_merge = next_cell.xpath('.//w:tcPr//w:vMerge') + except: + next_cell = None + next_merge = None + + if next_merge and next_merge[0].get(qn('w:val')) is None: + rowspan += 1 + else: + break + k += 1 + + for k in range(rowspan): + rowspan_dict[(i + k, j)] = rowspan - k - 1 + else: + v_merge_fin = True + + # Procesa colspan (gridSpan) + grid_span = cell.xpath('.//w:gridSpan') + if grid_span: + colspan = int(grid_span[0].get(qn('w:val'))) + for k in range(colspan): + colspan_dict[(i, j + k)] = colspan - k - 1 + + if not v_merge_fin: + # Obtén el contenido del texto de la celda + cell_text = "<br>".join([t.text for t in cell.xpath('.//w:t')]) + cell_text = clean_labels(cell_text) + (f" {hiperlinks}" if hiperlinks else "") + + # Determina el tag a usar (th para el encabezado, td para celdas normales) + tag = "th" if i == 0 else "td" + + # Construye la celda en HTML + cell_html = f" <{tag}" + if rowspan > 1: + cell_html += f' rowspan="{rowspan}"' + if colspan > 1: + cell_html += f' colspan="{colspan}"' + cell_html += f">{cell_text}</{tag}>\n" + + html += cell_html + j += 1 + (colspan - 1) # Avanza las columnas tomando en cuenta el colspan + + html += " </tr>\n" + + html += "</table>" + return html + + content = [] + sections = [] + images = [] + found_fb = False + review_fb = True + #Palabras a buscar como indicador del primer bloque + start_text = ['introducción', 'introduction', 'introdução'] + + current_list = [] + current_num_id = None + numId = None + namespaces_p = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}' + + for element in doc.element.body: + if isinstance(element, CT_P): + obj = {} + + namespaces = { + 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main', + 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', + 'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' + } + + hiperlinks = self.extract_hiperlink(element, hiperlinks_info, namespaces) if found_hiperlinks else None + + obj_image = False + obj_formula = False + + for drawing in element.findall('.//w:drawing', namespaces=namespaces): + if drawing.find('.//a:blip', namespaces=namespaces) is not None: + blip = drawing.find('.//a:blip', namespaces=namespaces) + if blip is not None: + obj_image = True + + rId = blip.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed') + image_part = doc.part.related_parts[rId] + image_data = image_part.blob + image_name = image_part.partname.split('/')[-1] + + if image_name not in images: + images.append(image_name) + + # Guardar la imagen en Wagtail + wagtail_image = ImageModel.objects.create( + title=image_name, + file=ContentFile(image_data, name=image_name) + ) + + # Referenciar la imagen guardada en el objeto + obj['type'] = 'image' + obj['image'] = wagtail_image.id + + ns_math = { + 'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math', + 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' + } + + for formula in element.findall('.//m:oMathPara', namespaces=ns_math): + obj_formula = True + mathml_result = transform(formula) + mathml_root = etree.fromstring(str(mathml_result)) + mathml_root = self.replace_mfenced_pipe_only(mathml_root) + obj['type'] = 'formula' + obj['formula'] = etree.tostring(mathml_root, pretty_print=True, encoding='unicode') + + + if not obj_image: + paragraph = element + text_paragraph = [] + + # Determina si es parte de una lista + is_numPr = paragraph.find('.//w:numPr', namespaces=paragraph.nsmap) is not None + + # obtiene id y nivel + if is_numPr: + numPr = paragraph.find('.//w:numPr', namespaces=paragraph.nsmap) + numId = numPr.find('.//w:numId', namespaces=paragraph.nsmap).get(namespaces_p + 'val') + type = [(key, objt) for key, objt in list_types.items() if objt['numId'] == numId] + + #Es una lista diferente + if numId != current_num_id: + current_num_id = numId + if len(current_list) > 0: + current_list.append('[/list]') + objl = {} + objl['type'] = 'list' + objl['list'] = '\n'.join(current_list) + current_list = [] + content.append(objl) + list_type = 'bullet' + if type[0][1][str(0)] == 'decimal': + list_type = 'order' + + current_list.append(f'[list list-type="{list_type}"]') + else: + #Se terminaron de agregar elementos a la lista + if len(current_list) > 0: + current_list.append('[/list]') + objl = {} + objl['type'] = 'list' + objl['list'] = '\n'.join(current_list) + current_list = [] + content.append(objl) + + for child in paragraph: + if child.tag == '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}hyperlink': + for r in child.findall('w:r', namespaces=child.nsmap): + t_elem = r.find('w:t', namespaces=child.nsmap) + if t_elem is not None and t_elem.text: + text_paragraph.append(t_elem.text) + + elif child.tag == '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}r': + namespaces = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}' + sz_element = child.find('.//w:sz', namespaces=child.nsmap) + obj['font_size'] = 0 + + if sz_element is None: + p_pr = paragraph.find('.//w:rPr/w:sz', namespaces=child.nsmap) + if p_pr is not None: + sz_element = p_pr.find('.//w:pPr', namespaces=child.nsmap) + + if sz_element is not None: + xml_string = etree.tostring(sz_element, pretty_print=True, encoding='unicode') + size_element = objectify.fromstring(xml_string) + font_size_value = size_element.get(namespaces+'val') + obj['font_size'] = int(font_size_value)/2 + + color_element = child.find('.//w:color', namespaces=child.nsmap) + + if color_element is None: + p_pr = paragraph.find('.//w:pPr', namespaces=child.nsmap) + if p_pr is not None: + color_element = p_pr.find('.//w:rPr/w:color', namespaces=child.nsmap) + + if color_element is not None: + xml_string_color = etree.tostring(color_element, pretty_print=True, encoding='unicode') + object_element = objectify.fromstring(xml_string_color) + color_value = object_element.get(namespaces + 'val') + obj['color'] = color_value + + b_tag = child.find('.//w:b', namespaces=child.nsmap) + + if b_tag is None: + p_pr = paragraph.find('.//w:rPr/w:b', namespaces=child.nsmap) + if p_pr is not None: + b_tag = p_pr.find('.//w:pPr', namespaces=child.nsmap) + + if b_tag is not None: + val = b_tag.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val') + obj['bold'] = (val is None or val in ['1', 'true', 'True']) + else: + obj['bold'] = False + + i_tag = child.find('.//w:i', namespaces=child.nsmap) + + if i_tag is None: + p_pr = paragraph.find('.//w:rPr/w:i', namespaces=child.nsmap) + if p_pr is not None: + i_tag = p_pr.find('.//w:pPr', namespaces=child.nsmap) + + if i_tag is not None: + val = i_tag.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val') + obj['italic'] = (val is None or val in ['1', 'true', 'True']) + else: + obj['italic'] = False + + s_tag = child.find('.//w:spacing', namespaces=child.nsmap) + + if s_tag is None: + p_pr = paragraph.find('.//w:rPr/w:spacing', namespaces=child.nsmap) + if p_pr is not None: + s_tag = p_pr.find('.//w:pPr', namespaces=child.nsmap) + + if s_tag is not None: + val = s_tag.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}before') + obj['spacing'] = not (val is None) + else: + obj['spacing'] = False + + clean_text = clean_labels(child.text) + + #identifica sección + sections = identify_section(sections, obj['font_size'], obj['bold'] , clean_text) + + if obj['italic']: + text_paragraph.append('<italic>' + clean_text + '</italic>' + (f" {hiperlinks}" if hiperlinks else "")) + else: + text_paragraph.append(clean_text + (f" {hiperlinks}" if hiperlinks else "")) + + paraph = match_paragraph(clean_text) + if paraph: + obj['paraph'] = paraph + obj['type'] = paraph + + if review_fb: + found_fb = any(word in clean_text.lower() for word in start_text) + + #Si se encontró alguna palabra, incluye todo lo anterior en un sólo bloque + if found_fb: + found_fb = False + review_fb = False + found_hiperlinks = False + sections = [sections[-1]] + first_block = '' + tmp_content = [] + abstract_mode = False + + for c in content: + if abstract_mode: + if c['text'] == '' or c['spacing'] is True: + abstract_mode = False + else: + tmp_content.append(c) + continue + + if 'paraph' in c: + tmp_content.append(c) + abstract_mode = False + if c['paraph'] == '<abstract>': + abstract_mode = True + continue + else: + if 'text' in c: + first_block = first_block + "\n" + c["text"] + if 'table' in c: + first_block = first_block + "\n" + c["table"] + + obj_b = {} + obj_b['type'] = 'first_block' + obj_b['text'] = first_block + tmp_content.append(obj_b) + content = tmp_content + start_text = [] + + if child.tag == f"{{{ns_math['m']}}}oMath": + if 'text' not in obj or not isinstance(obj['text'], list): + obj['type'] = 'compound' + obj['text'] = [] + if len(text_paragraph) > 0: + obj2 = {} + obj2['type'] = 'text' + obj2['value'] = ' '.join(text_paragraph) + obj['text'].append(obj2) + text_paragraph = [] + + mathml_result = transform(child) + mathml_root = etree.fromstring(str(mathml_result)) + self.replace_mfenced_pipe_only(mathml_root) + obj2 = {} + obj2['type'] = 'formula' + obj2['value'] = etree.tostring(mathml_root, pretty_print=True, encoding='unicode') + obj['text'].append(obj2) + + if 'text' not in obj: + obj['text'] = (' '.join(text_paragraph)).strip() + clean_text = clean_labels(obj['text']) + obj['text'] = clean_text + + paraph = match_paragraph(obj['text']) + if paraph: + obj['paraph'] = paraph + obj['type'] = paraph + + if is_numPr: + if 'font_size' in obj: + del obj['font_size'] + current_list.append(f'[list-item]{obj["text"]}[/list-item]') + if isinstance(obj['text'], list) and len(text_paragraph) > 0: + obj2 = {} + obj2['type'] = 'text' + obj2['value'] = ' '.join(text_paragraph) + obj['text'].append(obj2) + text_paragraph = [] + + elif isinstance(element, CT_Tbl): + namespaces = { + 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main', + 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', + 'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' + } + + table = element + table_data = extrae_Tabla(element, hiperlinks_info, namespaces) + obj = {} + obj['type'] = 'table' + obj['table'] = table_data + + if not is_numPr: + content.append(obj) + sections.sort(key=section_priority) + return sections, content diff --git a/markuplib/omml2mml.xsl b/markuplib/omml2mml.xsl new file mode 100644 index 0000000..dcd23ee --- /dev/null +++ b/markuplib/omml2mml.xsl @@ -0,0 +1,2068 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:mml="http://www.w3.org/1998/Math/MathML" + xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"> + <xsl:output method="xml" encoding="UTF-16" /> + + <!-- %% Global Definitions --> + + <!-- Every single unicode character that is recognized by OMML as an operator --> + <xsl:variable name="sOperators" + select="concat( + '¨!"#&()+,-./:', + ';<=>?@[\]^_`{', + '|}~¡¦¬¯°±²³´·¹¿', + '×~÷ˇ˘˙˜˝̀́̂̃̄̅̆̇̈̉', + '̊̋̌̍̎̏̐̑̒̓̔̕', + '̡̛̖̗̘̙̜̝̞̟̠̚', + '̢̧̨̣̤̥̦̩̪̫̬̭', + '̴̵̶̷̸̮̯̰̱̲̳̿', + '         ‐‒–', + '—‖†‡•․‥…′″‴‼', + '⁀⁄⁎⁏⁐⁗⁡⁢⁣⁰⁴⁵', + '⁶⁷⁸⁹⁺⁻⁼⁽⁾₀₁₂', + '₃₄₅₆₇₈₉₊₋₌₍₎', + '⃒⃓⃘⃙⃚⃐⃑⃔⃕⃖⃗⃛', + '⃜⃝⃞⃟⃠⃡⃤⃥⃦⃨⃧⃩', + '⃪⅀ⅆ←↑→↓↔↕↖↗↘↙', + '↚↛↜↝↞↟↠↡↢↣↤↥', + '↦↧↨↩↪↫↬↭↮↯↰↱', + '↲↳↶↷↺↻↼↽↾↿⇀⇁', + '⇂⇃⇄⇅⇆⇇⇈⇉⇊⇋⇌⇍', + '⇎⇏⇐⇑⇒⇓⇔⇕⇖⇗⇘⇙', + '⇚⇛⇜⇝⇞⇟⇠⇡⇢⇣⇤⇥', + '⇦⇧⇨⇩⇳⇴⇵⇶⇷⇸⇹⇺', + '⇻⇼⇽⇾⇿∀∁∂∃∄∆∇', + '∈∉∊∋∌∍∏∐∑−∓∔', + '∕∖∗∘∙√∛∜∝∣∤∥', + '∦∧∨∩∪∫∬∭∮∯∰∱', + '∲∳∴∵∶∷∸∹∺∻∼∽', + '∾≀≁≂≃≄≅≆≇≈≉≊', + '≋≌≍≎≏≐≑≒≓≔≕≖', + '≗≘≙≚≛≜≝≞≟≠≡≢', + '≣≤≥≦≧≨≩≪≫≬≭≮', + '≯≰≱≲≳≴≵≶≷≸≹≺', + '≻≼≽≾≿⊀⊁⊂⊃⊄⊅⊆', + '⊇⊈⊉⊊⊋⊌⊍⊎⊏⊐⊑⊒', + '⊓⊔⊕⊖⊗⊘⊙⊚⊛⊜⊝⊞', + '⊟⊠⊡⊢⊣⊥⊦⊧⊨⊩⊪⊫', + '⊬⊭⊮⊯⊰⊱⊲⊳⊴⊵⊶⊷', + '⊸⊹⊺⊻⊼⊽⋀⋁⋂⋃⋄⋅', + '⋆⋇⋈⋉⋊⋋⋌⋍⋎⋏⋐⋑', + '⋒⋓⋔⋕⋖⋗⋘⋙⋚⋛⋜⋝', + '⋞⋟⋠⋡⋢⋣⋤⋥⋦⋧⋨⋩', + '⋪⋫⋬⋭⋮⋯⋰⋱⋲⋳⋴⋵', + '⋶⋷⋸⋹⋺⋻⋼⋽⋾⋿⌅⌆', + '⌈⌉⌊⌋⌜⌝⌞⌟⌢⌣〈〉', + '⌽⌿⎰⎱⏜⏝⏞⏟⏠│├┤', + '┬┴▁█▒■□▭▲△▴▵', + '▶▷▸▹▼▽▾▿◀◁◂◃', + '◄◅◊○◦◫◬◸◹◺◻◼', + '◽◾◿★☆❲❳⟑⟒⟓⟔⟕', + '⟖⟗⟘⟙⟚⟛⟜⟝⟞⟟⟠⟡', + '⟢⟣⟤⟥⟦⟧⟨⟩⟪⟫⟰⟱', + '⟲⟳⟴⟵⟶⟷⟸⟹⟺⟻⟼⟽', + '⟾⟿⤀⤁⤂⤃⤄⤅⤆⤇⤈⤉', + '⤊⤋⤌⤍⤎⤏⤐⤑⤒⤓⤔⤕', + '⤖⤗⤘⤙⤚⤛⤜⤝⤞⤟⤠⤡', + '⤢⤣⤤⤥⤦⤧⤨⤩⤪⤫⤬⤭', + '⤮⤯⤰⤱⤲⤳⤴⤵⤶⤷⤸⤹', + '⤺⤻⤼⤽⤾⤿⥀⥁⥂⥃⥄⥅', + '⥆⥇⥈⥉⥊⥋⥌⥍⥎⥏⥐⥑', + '⥒⥓⥔⥕⥖⥗⥘⥙⥚⥛⥜⥝', + '⥞⥟⥠⥡⥢⥣⥤⥥⥦⥧⥨⥩', + '⥪⥫⥬⥭⥮⥯⥰⥱⥲⥳⥴⥵', + '⥶⥷⥸⥹⥺⥻⥼⥽⥾⥿⦀⦂', + '⦃⦄⦅⦆⦇⦈⦉⦊⦋⦌⦍⦎', + '⦏⦐⦑⦒⦓⦔⦕⦖⦗⦘⦙⦚', + '⦶⦷⦸⦹⧀⧁⧄⧅⧆⧇⧈⧎', + '⧏⧐⧑⧒⧓⧔⧕⧖⧗⧘⧙⧚', + '⧛⧟⧡⧢⧣⧤⧥⧦⧫⧴⧵⧶', + '⧷⧸⧹⧺⧻⧼⧽⧾⧿⨀⨁⨂', + '⨃⨄⨅⨆⨇⨈⨉⨊⨋⨌⨍⨎', + '⨏⨐⨑⨒⨓⨔⨕⨖⨗⨘⨙⨚', + '⨛⨜⨝⨞⨟⨠⨡⨢⨣⨤⨥⨦', + '⨧⨨⨩⨪⨫⨬⨭⨮⨯⨰⨱⨲', + '⨳⨴⨵⨶⨷⨸⨹⨺⨻⨼⨽⨾', + '⨿⩀⩁⩂⩃⩄⩅⩆⩇⩈⩉⩊', + '⩋⩌⩍⩎⩏⩐⩑⩒⩓⩔⩕⩖', + '⩗⩘⩙⩚⩛⩜⩝⩞⩟⩠⩡⩢', + '⩣⩤⩥⩦⩧⩨⩩⩪⩫⩬⩭⩮', + '⩯⩰⩱⩲⩳⩴⩵⩶⩷⩸⩹⩺', + '⩻⩼⩽⩾⩿⪀⪁⪂⪃⪄⪅⪆', + '⪇⪈⪉⪊⪋⪌⪍⪎⪏⪐⪑⪒', + '⪓⪔⪕⪖⪗⪘⪙⪚⪛⪜⪝⪞', + '⪟⪠⪡⪢⪣⪤⪥⪦⪧⪨⪩⪪', + '⪫⪬⪭⪮⪯⪰⪱⪲⪳⪴⪵⪶', + '⪷⪸⪹⪺⪻⪼⪽⪾⪿⫀⫁⫂', + '⫃⫄⫅⫆⫇⫈⫉⫊⫋⫌⫍⫎', + '⫏⫐⫑⫒⫓⫔⫕⫖⫗⫘⫙⫚', + '⫛⫝̸⫝⫞⫟⫠⫢⫣⫤⫥⫦⫧', + '⫨⫩⫪⫫⫬⫭⫮⫯⫰⫲⫳⫴', + '⫵⫶⫷⫸⫹⫺⫻⫼⫽⫾⫿⬄', + '⬆⬇⬌⬍〔〕〖〗〘〙!&', + '()+,-./:;<=>', + '?@[\]^_{|}')" /> + + <!-- A string of '-'s repeated exactly as many times as the operators above --> + <xsl:variable name="sMinuses"> + <xsl:call-template name="SRepeatChar"> + <xsl:with-param name="cchRequired" select="string-length($sOperators)" /> + <xsl:with-param name="ch" select="'-'" /> + </xsl:call-template> + </xsl:variable> + + <!-- Every single unicode character that is recognized by OMML as a number --> + <xsl:variable name="sNumbers" select="'0123456789'" /> + + <!-- A string of '0's repeated exactly as many times as the list of numbers above --> + <xsl:variable name="sZeros"> + <xsl:call-template name="SRepeatChar"> + <xsl:with-param name="cchRequired" select="string-length($sNumbers)" /> + <xsl:with-param name="ch" select="'0'" /> + </xsl:call-template> + </xsl:variable> + + <!-- %%Template: SReplace + + Replace all occurences of sOrig in sInput with sReplacement + and return the resulting string. --> + <xsl:template name="SReplace"> + <xsl:param name="sInput" /> + <xsl:param name="sOrig" /> + <xsl:param name="sReplacement" /> + + <xsl:choose> + <xsl:when test="not(contains($sInput, $sOrig))"> + <xsl:value-of select="$sInput" /> + </xsl:when> + <xsl:otherwise> + <xsl:variable name="sBefore" select="substring-before($sInput, $sOrig)" /> + <xsl:variable name="sAfter" select="substring-after($sInput, $sOrig)" /> + <xsl:variable name="sAfterProcessed"> + <xsl:call-template name="SReplace"> + <xsl:with-param name="sInput" select="$sAfter" /> + <xsl:with-param name="sOrig" select="$sOrig" /> + <xsl:with-param name="sReplacement" select="$sReplacement" /> + </xsl:call-template> + </xsl:variable> + + <xsl:value-of select="concat($sBefore, concat($sReplacement, $sAfterProcessed))" /> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <!-- Templates --> + <xsl:template match="/"> + <mml:math> + <xsl:apply-templates select="*" /> + </mml:math> + </xsl:template> + + <xsl:template match="m:borderBox"> + + <!-- Get Lowercase versions of properties --> + <xsl:variable name="sLowerCaseHideTop" select="translate(m:borderBoxPr[last()]/m:hideTop[last()]/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="sLowerCaseHideBot" select="translate(m:borderBoxPr[last()]/m:hideBot[last()]/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="sLowerCaseHideLeft" select="translate(m:borderBoxPr[last()]/m:hideLeft[last()]/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="sLowerCaseHideRight" select="translate(m:borderBoxPr[last()]/m:hideRight[last()]/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="sLowerCaseStrikeH" select="translate(m:borderBoxPr[last()]/m:strikeH[last()]/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="sLowerCaseStrikeV" select="translate(m:borderBoxPr[last()]/m:strikeV[last()]/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="sLowerCaseStrikeBLTR" select="translate(m:borderBoxPr[last()]/m:strikeBLTR[last()]/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="sLowerCaseStrikeTLBR" select="translate(m:borderBoxPr[last()]/m:strikeTLBR[last()]/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="fHideTop"> + <xsl:call-template name="ForceTrueStrVal"> + <xsl:with-param name="str" select="$sLowerCaseHideTop" /> + </xsl:call-template> + </xsl:variable> + <xsl:variable name="fHideBot"> + <xsl:call-template name="ForceTrueStrVal"> + <xsl:with-param name="str" select="$sLowerCaseHideBot" /> + </xsl:call-template> + </xsl:variable> + <xsl:variable name="fHideLeft"> + <xsl:call-template name="ForceTrueStrVal"> + <xsl:with-param name="str" select="$sLowerCaseHideLeft" /> + </xsl:call-template> + </xsl:variable> + <xsl:variable name="fHideRight"> + <xsl:call-template name="ForceTrueStrVal"> + <xsl:with-param name="str" select="$sLowerCaseHideRight" /> + </xsl:call-template> + </xsl:variable> + <xsl:variable name="fStrikeH"> + <xsl:call-template name="ForceTrueStrVal"> + <xsl:with-param name="str" select="$sLowerCaseStrikeH" /> + </xsl:call-template> + </xsl:variable> + <xsl:variable name="fStrikeV"> + <xsl:call-template name="ForceTrueStrVal"> + <xsl:with-param name="str" select="$sLowerCaseStrikeV" /> + </xsl:call-template> + </xsl:variable> + <xsl:variable name="fStrikeBLTR"> + <xsl:call-template name="ForceTrueStrVal"> + <xsl:with-param name="str" select="$sLowerCaseStrikeBLTR" /> + </xsl:call-template> + </xsl:variable> + <xsl:variable name="fStrikeTLBR"> + <xsl:call-template name="ForceTrueStrVal"> + <xsl:with-param name="str" select="$sLowerCaseStrikeTLBR" /> + </xsl:call-template> + </xsl:variable> + + <xsl:choose> + <xsl:when test="$fHideTop=1 + and $fHideBot=1 + and $fHideLeft=1 + and $fHideRight=1 + and $fStrikeH=0 + and $fStrikeV=0 + and $fStrikeBLTR=0 + and $fStrikeTLBR=0"> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + </xsl:when> + <xsl:otherwise> + <mml:menclose> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fHideTop" select="$fHideTop" /> + <xsl:with-param name="fHideBot" select="$fHideBot" /> + <xsl:with-param name="fHideLeft" select="$fHideLeft" /> + <xsl:with-param name="fHideRight" select="$fHideRight" /> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + </xsl:call-template> + <xsl:apply-templates select="m:e[1]" /> + </mml:menclose> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="*"> + <xsl:apply-templates select="*" /> + </xsl:template> + + <!-- + { Non-combining, Upper-combining, Lower-combining } + {U+02D8, U+0306, U+032E}, // BREVE + {U+00B8, U+0312, U+0327}, // CEDILLA + {U+0060, U+0300, U+0316}, // GRAVE ACCENT + {U+002D, U+0305, U+0332}, // HYPHEN-MINUS/OVERLINE + {U+2212, U+0305, U+0332}, // MINUS SIGN/OVERLINE + {U+002E, U+0305, U+0323}, // FULL STOP/DOT ABOVE + {U+02D9, U+0307, U+0323}, // DOT ABOVE + {U+02DD, U+030B, U+02DD}, // DOUBLE ACUTE ACCENT + {U+00B4, U+0301, U+0317}, // ACUTE ACCENT + {U+007E, U+0303, U+0330}, // TILDE + {U+02DC, U+0303, U+0330}, // SMALL TILDE + {U+00A8, U+0308, U+0324}, // DIAERESIS + {U+02C7, U+030C, U+032C}, // CARON + {U+005E, U+0302, U+032D}, // CIRCUMFLEX ACCENT + {U+00AF, U+0305, ::::::}, // MACRON + {U+005F, ::::::, U+0332}, // LOW LINE + {U+2192, U+20D7, U+20EF}, // RIGHTWARDS ARROW + {U+27F6, U+20D7, U+20EF}, // LONG RIGHTWARDS ARROW + {U+2190, U+20D6, U+20EE}, // LEFT ARROW + --> + <xsl:template name="ToNonCombining"> + <xsl:param name="ch" /> + <xsl:choose> + <!-- BREVE --> + <xsl:when test="$ch='̆' or $ch='̮'">˘</xsl:when> + <!-- CEDILLA --> + <xsl:when test="$ch='̒' or $ch='̧'">¸</xsl:when> + <!-- GRAVE ACCENT --> + <xsl:when test="$ch='̀' or $ch='̖'">`</xsl:when> + <!-- HYPHEN-MINUS/OVERLINE --> + <xsl:when test="$ch='̅' or $ch='̲'">-</xsl:when> + <!-- MINUS SIGN/OVERLINE --> + <xsl:when test="$ch='̅' or $ch='̲'">−</xsl:when> + <!-- FULL STOP/DOT ABOVE --> + <xsl:when test="$ch='̅' or $ch='̣'">.</xsl:when> + <!-- DOT ABOVE --> + <xsl:when test="$ch='̇' or $ch='̣'">˙</xsl:when> + <!-- DOUBLE ACUTE ACCENT --> + <xsl:when test="$ch='̋' or $ch='˝'">˝</xsl:when> + <!-- ACUTE ACCENT --> + <xsl:when test="$ch='́' or $ch='̗'">´</xsl:when> + <!-- TILDE --> + <xsl:when test="$ch='̃' or $ch='̰'">~</xsl:when> + <!-- SMALL TILDE --> + <xsl:when test="$ch='̃' or $ch='̰'">˜</xsl:when> + <!-- DIAERESIS --> + <xsl:when test="$ch='̈' or $ch='̤'">¨</xsl:when> + <!-- CARON --> + <xsl:when test="$ch='̌' or $ch='̬'">ˇ</xsl:when> + <!-- CIRCUMFLEX ACCENT --> + <xsl:when test="$ch='̂' or $ch='̭'">^</xsl:when> + <!-- MACRON --> + <xsl:when test="$ch='̅' ">¯</xsl:when> + <!-- LOW LINE --> + <xsl:when test=" $ch='̲'">_</xsl:when> + <!-- RIGHTWARDS ARROW --> + <xsl:when test="$ch='⃗' or $ch='⃯'">→</xsl:when> + <!-- LONG RIGHTWARDS ARROW --> + <xsl:when test="$ch='⃗' or $ch='⃯'">⟶</xsl:when> + <!-- LEFT ARROW --> + <xsl:when test="$ch='⃖' or $ch='⃮'">←</xsl:when> + <xsl:otherwise> + <xsl:value-of select="$ch"/> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="m:acc"> + <mml:mover> + <xsl:attribute name="accent">true</xsl:attribute> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <xsl:variable name="chAcc"> + <xsl:choose> + <xsl:when test="not(m:accPr[last()]/m:chr)"> + <xsl:value-of select="'̂'" /> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="substring(m:accPr/m:chr/@m:val,1,1)" /> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + <xsl:variable name="chNonComb"> + <xsl:call-template name="ToNonCombining"> + <xsl:with-param name="ch" select="$chAcc" /> + </xsl:call-template> + </xsl:variable> + <xsl:choose> + <xsl:when test="string-length($chAcc)=0"> + <mml:mo/> + </xsl:when> + <xsl:otherwise> + <xsl:call-template name="ParseMt"> + <xsl:with-param name="sToParse" select="$chNonComb" /> + <xsl:with-param name="scr" select="m:e[1]/*/m:rPr[last()]/m:scr/@m:val" /> + <xsl:with-param name="sty" select="m:e[1]/*/m:rPr[last()]/m:sty/@m:val" /> + <xsl:with-param name="nor"> + <xsl:choose> + <xsl:when test="count(m:e[1]/*/m:rPr[last()]/m:nor) = 0">0</xsl:when> + <xsl:otherwise> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="translate(m:e[1]/*/m:rPr[last()]/m:nor/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:with-param> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </mml:mover> + </xsl:template> + + <xsl:template name="OutputScript"> + <xsl:param name="ndCur" select="." /> + <xsl:choose> + <!-- Only output contents of $ndCur if $ndCur exists + and $ndCur has children --> + <xsl:when test="count($ndCur/*) > 0"> + <mml:mrow> + <xsl:apply-templates select="$ndCur" /> + </mml:mrow> + </xsl:when> + <xsl:otherwise> + <mml:none /> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="m:sPre"> + <mml:mmultiscripts> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mprescripts /> + <xsl:call-template name="OutputScript"> + <xsl:with-param name="ndCur" select="m:sub[1]"/> + </xsl:call-template> + <xsl:call-template name="OutputScript"> + <xsl:with-param name="ndCur" select="m:sup[1]" /> + </xsl:call-template> + </mml:mmultiscripts> + </xsl:template> + + <xsl:template match="m:m"> + <mml:mtable> + <xsl:call-template name="CreateMathMLMatrixAttr"> + <xsl:with-param name="mcJc" select="m:mPr[last()]/m:mcs/m:mc/m:mcPr[last()]/m:mcJc/@m:val" /> + </xsl:call-template> + <xsl:for-each select="m:mr"> + <mml:mtr> + <xsl:for-each select="m:e"> + <mml:mtd> + <xsl:apply-templates select="." /> + </mml:mtd> + </xsl:for-each> + </mml:mtr> + </xsl:for-each> + </mml:mtable> + </xsl:template> + + <xsl:template name="CreateMathMLMatrixAttr"> + <xsl:param name="mcJc" /> + <xsl:variable name="sLowerCaseMcjc" select="translate($mcJc, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:choose> + <xsl:when test="$sLowerCaseMcjc='left'"> + <xsl:attribute name="columnalign">left</xsl:attribute> + </xsl:when> + <xsl:when test="$sLowerCaseMcjc='right'"> + <xsl:attribute name="columnalign">right</xsl:attribute> + </xsl:when> + </xsl:choose> + </xsl:template> + + <xsl:template match="m:phant"> + <xsl:variable name="sLowerCaseZeroWidVal" select="translate(m:phantPr[last()]/m:zeroWid[last()]/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="sLowerCaseZeroAscVal" select="translate(m:phantPr[last()]/m:zeroAsc[last()]/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="sLowerCaseZeroDescVal" select="translate(m:phantPr[last()]/m:zeroDesc[last()]/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="sLowerCaseShowVal" select="translate(m:phantPr[last()]/m:show[last()]/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + + + <!-- The following properties default to 'yes' unless the last value equals 'no' or there isn't any node for + the property --> + + <xsl:variable name="fZeroWid"> + <xsl:choose> + <xsl:when test="count(m:phantPr[last()]/m:zeroWid[last()]) = 0">0</xsl:when> + <xsl:otherwise> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="$sLowerCaseZeroWidVal" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + <xsl:variable name="fZeroAsc"> + <xsl:choose> + <xsl:when test="count(m:phantPr[last()]/m:zeroAsc[last()]) = 0">0</xsl:when> + <xsl:otherwise> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="$sLowerCaseZeroAscVal" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + <xsl:variable name="fZeroDesc"> + <xsl:choose> + <xsl:when test="count(m:phantPr[last()]/m:zeroDesc[last()]) = 0">0</xsl:when> + <xsl:otherwise> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="$sLowerCaseZeroDescVal" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + + <!-- The show property defaults to 'on' unless there exists a show property and its value is 'off' --> + + <xsl:variable name="fShow"> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="$sLowerCaseShowVal" /> + </xsl:call-template> + </xsl:variable> + + <xsl:choose> + <!-- Show the phantom contents, therefore, just use mpadded. --> + <xsl:when test="$fShow = 1"> + <xsl:element name="mml:mpadded"> + <xsl:call-template name="CreateMpaddedAttributes"> + <xsl:with-param name="fZeroWid" select="$fZeroWid" /> + <xsl:with-param name="fZeroAsc" select="$fZeroAsc" /> + <xsl:with-param name="fZeroDesc" select="$fZeroDesc" /> + </xsl:call-template> + <mml:mrow> + <xsl:apply-templates select="m:e" /> + </mml:mrow> + </xsl:element> + </xsl:when> + <!-- Don't show phantom contents, but don't smash anything, therefore, just + use mphantom --> + <xsl:when test="$fZeroWid=0 and $fZeroAsc=0 and $fZeroDesc=0"> + <xsl:element name="mml:mphantom"> + <mml:mrow> + <xsl:apply-templates select="m:e" /> + </mml:mrow> + </xsl:element> + </xsl:when> + <!-- Combination --> + <xsl:otherwise> + <xsl:element name="mml:mphantom"> + <xsl:element name="mml:mpadded"> + <xsl:call-template name="CreateMpaddedAttributes"> + <xsl:with-param name="fZeroWid" select="$fZeroWid" /> + <xsl:with-param name="fZeroAsc" select="$fZeroAsc" /> + <xsl:with-param name="fZeroDesc" select="$fZeroDesc" /> + </xsl:call-template> + <mml:mrow> + <xsl:apply-templates select="m:e" /> + </mml:mrow> + </xsl:element> + </xsl:element> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template name="CreateMpaddedAttributes"> + <xsl:param name="fZeroWid" /> + <xsl:param name="fZeroAsc" /> + <xsl:param name="fZeroDesc" /> + + <xsl:if test="$fZeroWid=1"> + <xsl:attribute name="width">0in</xsl:attribute> + </xsl:if> + <xsl:if test="$fZeroAsc=1"> + <xsl:attribute name="height">0in</xsl:attribute> + </xsl:if> + <xsl:if test="$fZeroDesc=1"> + <xsl:attribute name="depth">0in</xsl:attribute> + </xsl:if> + </xsl:template> + + + + <xsl:template match="m:rad"> + <xsl:variable name="fDegHide"> + <xsl:choose> + <xsl:when test="count(m:radPr[last()]/m:degHide)=0">0</xsl:when> + <xsl:otherwise> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="translate(m:radPr[last()]/m:degHide/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + <xsl:choose> + <xsl:when test="$fDegHide=1"> + <mml:msqrt> + <xsl:apply-templates select="m:e[1]" /> + </mml:msqrt> + </xsl:when> + <xsl:otherwise> + <mml:mroot> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:deg[1]" /> + </mml:mrow> + </mml:mroot> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template name="OutputNaryMo"> + <xsl:param name="ndCur" select="." /> + <xsl:param name="fGrow" select="0" /> + <mml:mo> + <xsl:choose> + <xsl:when test="$fGrow=1"> + <xsl:attribute name="stretchy">true</xsl:attribute> + </xsl:when> + <xsl:otherwise> + <xsl:attribute name="stretchy">false</xsl:attribute> + </xsl:otherwise> + </xsl:choose> + <xsl:choose> + <xsl:when test="not($ndCur/m:naryPr[last()]/m:chr/@m:val) or + $ndCur/m:naryPr[last()]/m:chr/@m:val=''"> + <xsl:text>∫</xsl:text> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$ndCur/m:naryPr[last()]/m:chr/@m:val" /> + </xsl:otherwise> + </xsl:choose> + </mml:mo> + </xsl:template> + + <!-- %%Template match m:nary + Process an n-ary. + + Decides, based on which arguments are supplied, between + using an mo, msup, msub, or msubsup for the n-ary operator + --> + <xsl:template match="m:nary"> + <xsl:variable name="sLowerCaseSubHide"> + <xsl:choose> + <xsl:when test="count(m:naryPr[last()]/m:subHide) = 0"> + <xsl:text>off</xsl:text> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="translate(m:naryPr[last()]/m:subHide/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + + <xsl:variable name="sLowerCaseSupHide"> + <xsl:choose> + <xsl:when test="count(m:naryPr[last()]/m:supHide) = 0"> + <xsl:text>off</xsl:text> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="translate(m:naryPr[last()]/m:supHide/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + + <xsl:variable name="sLowerCaseLimLoc"> + <xsl:value-of select="translate(m:naryPr[last()]/m:limLoc/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + </xsl:variable> + + <xsl:variable name="sLowerGrow"> + <xsl:choose> + <xsl:when test="count(m:naryPr[last()]/m:grow)=0">off</xsl:when> + <xsl:otherwise> + <xsl:value-of select="translate(m:naryPr[last()]/m:grow/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + + <xsl:variable name="fLimLocSubSup"> + <xsl:choose> + <xsl:when test="count(m:naryPr[last()]/m:limLoc)=0 or $sLowerCaseLimLoc='subsup'">1</xsl:when> + <xsl:otherwise>0</xsl:otherwise> + </xsl:choose> + </xsl:variable> + + <xsl:variable name="fGrow"> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="$sLowerGrow" /> + </xsl:call-template> + </xsl:variable> + + <xsl:variable name="fSupHide"> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="$sLowerCaseSupHide" /> + </xsl:call-template> + </xsl:variable> + + <xsl:variable name="fSubHide"> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="$sLowerCaseSubHide" /> + </xsl:call-template> + </xsl:variable> + + <mml:mrow> + <xsl:choose> + <xsl:when test="$fSupHide=1 and $fSubHide=1"> + <xsl:call-template name="OutputNaryMo"> + <xsl:with-param name="ndCur" select="." /> + <xsl:with-param name="fGrow" select="$fGrow" /> + </xsl:call-template> + </xsl:when> + <xsl:when test="$fSubHide=1"> + <xsl:choose> + <xsl:when test="$fLimLocSubSup=1"> + <mml:msup> + <xsl:call-template name="OutputNaryMo"> + <xsl:with-param name="ndCur" select="." /> + <xsl:with-param name="fGrow" select="$fGrow" /> + </xsl:call-template> + <mml:mrow> + <xsl:apply-templates select="m:sup[1]" /> + </mml:mrow> + </mml:msup> + </xsl:when> + <xsl:otherwise> + <mml:mover> + <xsl:call-template name="OutputNaryMo"> + <xsl:with-param name="ndCur" select="." /> + <xsl:with-param name="fGrow" select="$fGrow" /> + </xsl:call-template> + <mml:mrow> + <xsl:apply-templates select="m:sup[1]" /> + </mml:mrow> + </mml:mover> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:when test="$fSupHide=1"> + <xsl:choose> + <xsl:when test="$fLimLocSubSup=1"> + <mml:msub> + <xsl:call-template name="OutputNaryMo"> + <xsl:with-param name="ndCur" select="." /> + <xsl:with-param name="fGrow" select="$fGrow" /> + </xsl:call-template> + <mml:mrow> + <xsl:apply-templates select="m:sub[1]" /> + </mml:mrow> + </mml:msub> + </xsl:when> + <xsl:otherwise> + <mml:munder> + <xsl:call-template name="OutputNaryMo"> + <xsl:with-param name="ndCur" select="." /> + <xsl:with-param name="fGrow" select="$fGrow" /> + </xsl:call-template> + <mml:mrow> + <xsl:apply-templates select="m:sub[1]" /> + </mml:mrow> + </mml:munder> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:otherwise> + <xsl:choose> + <xsl:when test="$fLimLocSubSup=1"> + <mml:msubsup> + <xsl:call-template name="OutputNaryMo"> + <xsl:with-param name="ndCur" select="." /> + <xsl:with-param name="fGrow" select="$fGrow" /> + </xsl:call-template> + <mml:mrow> + <xsl:apply-templates select="m:sub[1]" /> + </mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:sup[1]" /> + </mml:mrow> + </mml:msubsup> + </xsl:when> + <xsl:otherwise> + <mml:munderover> + <xsl:call-template name="OutputNaryMo"> + <xsl:with-param name="ndCur" select="." /> + <xsl:with-param name="fGrow" select="$fGrow" /> + </xsl:call-template> + <mml:mrow> + <xsl:apply-templates select="m:sub[1]" /> + </mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:sup[1]" /> + </mml:mrow> + </mml:munderover> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + </mml:mrow> + </xsl:template> + + <xsl:template match="m:limLow"> + <mml:munder> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:lim[1]" /> + </mml:mrow> + </mml:munder> + </xsl:template> + + <xsl:template match="m:limUpp"> + <mml:mover> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:lim[1]" /> + </mml:mrow> + </mml:mover> + </xsl:template> + + <xsl:template match="m:sSub"> + <mml:msub> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:sub[1]" /> + </mml:mrow> + </mml:msub> + </xsl:template> + + <xsl:template match="m:sSup"> + <mml:msup> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:sup[1]" /> + </mml:mrow> + </mml:msup> + </xsl:template> + + <xsl:template match="m:sSubSup"> + <mml:msubsup> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:sub[1]" /> + </mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:sup[1]" /> + </mml:mrow> + </mml:msubsup> + </xsl:template> + + <xsl:template match="m:groupChr"> + <xsl:variable name="ndLastGroupChrPr" select="m:groupChrPr[last()]" /> + <xsl:variable name="sLowerCasePos" select="translate($ndLastGroupChrPr/m:pos/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + + <xsl:variable name="sLowerCaseVertJc" select="translate($ndLastGroupChrPr/m:vertJc/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:variable name="ndLastChr" select="$ndLastGroupChrPr/m:chr" /> + + <xsl:variable name="chr"> + <xsl:choose> + <xsl:when test="$ndLastChr and (not($ndLastChr/@m:val) or string-length($ndLastChr/@m:val) = 0)"></xsl:when> + <xsl:when test="string-length($ndLastChr/@m:val) >= 1"> + <xsl:value-of select="substring($ndLastChr/@m:val,1,1)" /> + </xsl:when> + <xsl:otherwise> + <xsl:text>⏟</xsl:text> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + <xsl:choose> + <xsl:when test="$sLowerCasePos = 'top'"> + <xsl:choose> + <xsl:when test="$sLowerCaseVertJc = 'bot'"> + <mml:mover accent="false"> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mo> + <xsl:value-of select="$chr" /> + </mml:mo> + </mml:mover> + </xsl:when> + <xsl:otherwise> + <mml:munder accentunder="false"> + <mml:mo> + <xsl:value-of select="$chr" /> + </mml:mo> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + </mml:munder> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:otherwise> + <xsl:choose> + <xsl:when test="$sLowerCaseVertJc = 'bot'"> + <mml:mover accent="false"> + <mml:mo> + <xsl:value-of select="$chr" /> + </mml:mo> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + </mml:mover> + </xsl:when> + <xsl:otherwise> + <mml:munder accentunder="false"> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mo> + <xsl:value-of select="$chr" /> + </mml:mo> + </mml:munder> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template name="fName"> + <xsl:for-each select="m:fName/*"> + <xsl:apply-templates select="." /> + </xsl:for-each> + </xsl:template> + + <xsl:template match="m:func"> + <mml:mrow> + <mml:mrow> + <xsl:call-template name="fName" /> + </mml:mrow> + <mml:mo>⁡</mml:mo> + <mml:mrow> + <xsl:apply-templates select="m:e" /> + </mml:mrow> + </mml:mrow> + </xsl:template> + + <!-- %%Template: match m:f + + m:f maps directly to mfrac. + --> + <xsl:template match="m:f"> + <xsl:variable name="sLowerCaseType" select="translate(m:fPr[last()]/m:type/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz')" /> + <xsl:choose> + <xsl:when test="$sLowerCaseType='lin'"> + <mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:num[1]" /> + </mml:mrow> + <mml:mo>/</mml:mo> + <mml:mrow> + <xsl:apply-templates select="m:den[1]" /> + </mml:mrow> + </mml:mrow> + </xsl:when> + <xsl:otherwise> + <mml:mfrac> + <xsl:call-template name="CreateMathMLFracProp"> + <xsl:with-param name="type" select="$sLowerCaseType" /> + </xsl:call-template> + <mml:mrow> + <xsl:apply-templates select="m:num[1]" /> + </mml:mrow> + <mml:mrow> + <xsl:apply-templates select="m:den[1]" /> + </mml:mrow> + </mml:mfrac> + </xsl:otherwise> + </xsl:choose> + + </xsl:template> + + + <!-- %%Template: CreateMathMLFracProp + + Make fraction properties based on supplied parameters. + OMML differentiates between a linear fraction and a skewed + one. For MathML, we write both as bevelled. + --> + <xsl:template name="CreateMathMLFracProp"> + <xsl:param name="type" /> + <xsl:variable name="sLowerCaseType" select="translate($type, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz')" /> + + <xsl:if test="$sLowerCaseType='skw' or $sLowerCaseType='lin'"> + <xsl:attribute name="bevelled">true</xsl:attribute> + </xsl:if> + <xsl:if test="$sLowerCaseType='nobar'"> + <xsl:attribute name="linethickness">0pt</xsl:attribute> + </xsl:if> + <xsl:choose> + <xsl:when test="sLowerCaseNumJc='right'"> + <xsl:attribute name="numalign">right</xsl:attribute> + </xsl:when> + <xsl:when test="sLowerCaseNumJc='left'"> + <xsl:attribute name="numalign">left</xsl:attribute> + </xsl:when> + </xsl:choose> + <xsl:choose> + <xsl:when test="sLowerCaseDenJc='right'"> + <xsl:attribute name="numalign">right</xsl:attribute> + </xsl:when> + <xsl:when test="sLowerCaseDenJc='left'"> + <xsl:attribute name="numalign">left</xsl:attribute> + </xsl:when> + </xsl:choose> + </xsl:template> + + <!-- %%Template: match m:e | m:den | m:num | m:lim | m:sup | m:sub + + These element delinate parts of an expression (like the numerator). --> + <xsl:template match="m:e | m:den | m:num | m:lim | m:sup | m:sub"> + <xsl:choose> + + <!-- If there is no scriptLevel specified, just call through --> + <xsl:when test="not(m:argPr[last()]/m:scrLvl/@m:val)"> + <xsl:apply-templates select="*" /> + </xsl:when> + + <!-- Otherwise, create an mstyle and set the script level --> + <xsl:otherwise> + <mml:mstyle> + <xsl:attribute name="scriptlevel"> + <xsl:value-of select="m:argPr[last()]/m:scrLvl/@m:val" /> + </xsl:attribute> + <xsl:apply-templates select="*" /> + </mml:mstyle> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="m:bar"> + <xsl:variable name="sLowerCasePos" select="translate(m:barPr/m:pos/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + + <xsl:variable name="fTop"> + + <xsl:choose> + <xsl:when test="$sLowerCasePos='top'">1</xsl:when> + <xsl:otherwise>0</xsl:otherwise> + </xsl:choose> + </xsl:variable> + <xsl:choose> + <xsl:when test="$fTop=1"> + <mml:mover accent="false"> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mo> + <xsl:text>¯</xsl:text> + </mml:mo> + </mml:mover> + </xsl:when> + <xsl:otherwise> + <mml:munder underaccent="false"> + <mml:mrow> + <xsl:apply-templates select="m:e[1]" /> + </mml:mrow> + <mml:mo> + <xsl:text>_</xsl:text> + </mml:mo> + </mml:munder> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <!-- %%Template match m:d + + Process a delimiter. + --> + <xsl:template match="m:d"> + <mml:mfenced> + <!-- open: default is '(' for both OMML and MathML --> + <xsl:if test="m:dPr[1]/m:begChr/@m:val and not(m:dPr[1]/m:begChr/@m:val ='(')"> + <xsl:attribute name="open"> + <xsl:value-of select="m:dPr[1]/m:begChr/@m:val" /> + </xsl:attribute> + </xsl:if> + + <!-- close: default is ')' for both OMML and MathML --> + <xsl:if test="m:dPr[1]/m:endChr/@m:val and not(m:dPr[1]/m:endChr/@m:val =')')"> + <xsl:attribute name="close"> + <xsl:value-of select="m:dPr[1]/m:endChr/@m:val" /> + </xsl:attribute> + </xsl:if> + + <!-- separator: the default is ',' for MathML, and '|' for OMML --> + <xsl:choose> + <!-- Matches MathML default. Write nothing --> + <xsl:when test="m:dPr[1]/m:sepChr/@m:val = ','" /> + + <!-- OMML default: | --> + <xsl:when test="not(m:dPr[1]/m:sepChr/@m:val)"> + <xsl:attribute name="separators"> + <xsl:value-of select="'|'" /> + </xsl:attribute> + </xsl:when> + + <xsl:otherwise> + <xsl:attribute name="separators"> + <xsl:value-of select="m:dPr[1]/m:sepChr/@m:val" /> + </xsl:attribute> + </xsl:otherwise> + </xsl:choose> + + <!-- now write all the children. Put each one into an mrow + just in case it produces multiple runs, etc --> + <xsl:for-each select="m:e"> + <mml:mrow> + <xsl:apply-templates select="." /> + </mml:mrow> + </xsl:for-each> + </mml:mfenced> + </xsl:template> + + <xsl:template match="m:r"> + <xsl:variable name="fNor"> + <xsl:choose> + <xsl:when test="count(child::m:rPr[last()]/m:nor) = 0">0</xsl:when> + <xsl:otherwise> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="translate(child::m:rPr[last()]/m:nor/@m:val, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + + <xsl:choose> + <xsl:when test="$fNor=1"> + <mml:mtext> + <xsl:variable name="sOutput" select="translate(.//m:t, ' ', ' ')" /> + <xsl:value-of select="$sOutput" /> + </mml:mtext> + </xsl:when> + <xsl:otherwise> + <xsl:for-each select=".//m:t"> + <xsl:call-template name="ParseMt"> + <xsl:with-param name="sToParse" select="text()" /> + <xsl:with-param name="scr" select="../m:rPr[last()]/m:scr/@m:val" /> + <xsl:with-param name="sty" select="../m:rPr[last()]/m:sty/@m:val" /> + <xsl:with-param name="nor">0</xsl:with-param> + </xsl:call-template> + </xsl:for-each> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + + <xsl:template name="CreateTokenAttributes"> + <xsl:param name="scr" /> + <xsl:param name="sty" /> + <xsl:param name="nor" /> + <xsl:param name="nCharToPrint" /> + <xsl:param name="sTokenType" /> + + <xsl:choose> + <xsl:when test="$nor=1"> + <xsl:attribute name="mathvariant">normal</xsl:attribute> + </xsl:when> + <xsl:otherwise> + <xsl:variable name="mathvariant"> + <xsl:choose> + <!-- numbers don't care --> + <xsl:when test="$sTokenType='mn'" /> + + <xsl:when test="$scr='monospace'">monospace</xsl:when> + <xsl:when test="$scr='sans-serif' and $sty='i'">sans-serif-italic</xsl:when> + <xsl:when test="$scr='sans-serif' and $sty='b'">bold-sans-serif</xsl:when> + <xsl:when test="$scr='sans-serif' and $sty='bi'">sans-serif-bold-italic</xsl:when> + <xsl:when test="$scr='sans-serif'">sans-serif</xsl:when> + <xsl:when test="$scr='fraktur' and ($sty='b' or $sty='bi')">bold-fraktur</xsl:when> + <xsl:when test="$scr='fraktur'">fraktur</xsl:when> + <xsl:when test="$scr='double-struck'">double-struck</xsl:when> + <xsl:when test="$scr='script' and ($sty='b' or $sty='bi')">bold-script</xsl:when> + <xsl:when test="$scr='script'">script</xsl:when> + <xsl:when test="($scr='roman' or not($scr) or $scr='') and $sty='b'">bold</xsl:when> + <xsl:when test="($scr='roman' or not($scr) or $scr='') and $sty='i'">italic</xsl:when> + <xsl:when test="($scr='roman' or not($scr) or $scr='') and $sty='p'">normal</xsl:when> + <xsl:when test="($scr='roman' or not($scr) or $scr='') and $sty='bi'">bold-italic</xsl:when> + <xsl:otherwise /> + </xsl:choose> + </xsl:variable> + <xsl:variable name="fontweight"> + <xsl:choose> + <xsl:when test="$sty='b' or $sty='bi'">bold</xsl:when> + <xsl:otherwise>normal</xsl:otherwise> + </xsl:choose> + </xsl:variable> + <xsl:variable name="fontstyle"> + <xsl:choose> + <xsl:when test="$sty='p' or $sty='b'">normal</xsl:when> + <xsl:otherwise>italic</xsl:otherwise> + </xsl:choose> + </xsl:variable> + + <!-- Writing of attributes begins here --> + <xsl:choose> + <!-- Don't write mathvariant for operators unless they want to be normal --> + <xsl:when test="$sTokenType='mo' and $mathvariant!='normal'" /> + + <!-- A single character within an mi is already italics, don't write --> + <xsl:when test="$sTokenType='mi' and $nCharToPrint=1 and ($mathvariant='' or $mathvariant='italic')" /> + + <xsl:when test="$sTokenType='mi' and $nCharToPrint > 1 and ($mathvariant='' or $mathvariant='italic')"> + <xsl:attribute name="mathvariant"> + <xsl:value-of select="'italic'" /> + </xsl:attribute> + </xsl:when> + <xsl:when test="$mathvariant!='italic' and $mathvariant!=''"> + <xsl:attribute name="mathvariant"> + <xsl:value-of select="$mathvariant" /> + </xsl:attribute> + </xsl:when> + <xsl:otherwise> + <xsl:if test="not($sTokenType='mi' and $nCharToPrint=1) and $fontstyle='italic'"> + <xsl:attribute name="fontstyle">italic</xsl:attribute> + </xsl:if> + <xsl:if test="$fontweight='bold'"> + <xsl:attribute name="fontweight">bold</xsl:attribute> + </xsl:if> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="m:eqArr"> + <mml:mtable> + <xsl:for-each select="m:e"> + <mml:mtr> + <mml:mtd> + <xsl:choose> + <xsl:when test="m:argPr[last()]/m:scrLvl/@m:val!='0' or + not(m:argPr[last()]/m:scrLvl/@m:val) or + m:argPr[last()]/m:scrLvl/@m:val=''"> + <mml:mrow> + <mml:maligngroup /> + <xsl:call-template name="CreateEqArrRow"> + <xsl:with-param name="align" select="1" /> + <xsl:with-param name="ndCur" select="*[1]" /> + </xsl:call-template> + </mml:mrow> + </xsl:when> + <xsl:otherwise> + <mml:mstyle> + <xsl:attribute name="scriptlevel"> + <xsl:value-of select="m:argPr[last()]/m:scrLvl/@m:val" /> + </xsl:attribute> + <mml:maligngroup /> + <xsl:call-template name="CreateEqArrRow"> + <xsl:with-param name="align" select="1" /> + <xsl:with-param name="ndCur" select="*[1]" /> + </xsl:call-template> + </mml:mstyle> + </xsl:otherwise> + </xsl:choose> + </mml:mtd> + </mml:mtr> + </xsl:for-each> + </mml:mtable> + </xsl:template> + + <xsl:template name="CreateEqArrRow"> + <xsl:param name="align" /> + <xsl:param name="ndCur" /> + <xsl:variable name="sAllMt"> + <xsl:for-each select="$ndCur/m:t"> + <xsl:value-of select="." /> + </xsl:for-each> + </xsl:variable> + <xsl:choose> + <xsl:when test="$ndCur/self::m:r"> + <xsl:call-template name="ParseEqArrMr"> + <xsl:with-param name="sToParse" select="$sAllMt" /> + <xsl:with-param name="scr" select="../m:rPr[last()]/m:scr/@m:val" /> + <xsl:with-param name="sty" select="../m:rPr[last()]/m:sty/@m:val" /> + <xsl:with-param name="nor"> + <xsl:choose> + <xsl:when test="count($ndCur/m:rPr[last()]/m:nor) = 0">0</xsl:when> + <xsl:otherwise> + <xsl:call-template name="ForceFalseStrVal"> + <xsl:with-param name="str" select="translate($ndCur/m:rPr[last()]/m:nor/@m:val, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz')" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:with-param> + <xsl:with-param name="align" select="$align" /> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:apply-templates select="$ndCur" /> + </xsl:otherwise> + </xsl:choose> + <xsl:if test="count($ndCur/following-sibling::*) > 0"> + <xsl:variable name="cAmp"> + <xsl:call-template name="CountAmp"> + <xsl:with-param name="sAllMt" select="$sAllMt" /> + <xsl:with-param name="cAmp" select="0" /> + </xsl:call-template> + </xsl:variable> + <xsl:call-template name="CreateEqArrRow"> + <xsl:with-param name="align" select="($align+($cAmp mod 2)) mod 2" /> + <xsl:with-param name="ndCur" select="$ndCur/following-sibling::*[1]" /> + </xsl:call-template> + </xsl:if> + </xsl:template> + + <xsl:template name="CountAmp"> + <xsl:param name="sAllMt" /> + <xsl:param name="cAmp" /> + <xsl:choose> + <xsl:when test="string-length(substring-after($sAllMt, '&')) > 0 or + substring($sAllMt, string-length($sAllMt))='&'"> + <xsl:call-template name="CountAmp"> + <xsl:with-param name="sAllMt" select="substring-after($sAllMt, '&')" /> + <xsl:with-param name="cAmp" select="$cAmp+1" /> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$cAmp" /> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <!-- %%Template: ParseEqArrMr + + Similar to ParseMt, but this one has to do more for an equation array. + In equation arrays & is a special character which denotes alignment. + + The & in an equation works by alternating between meaning insert alignment spacing + and insert alignment mark. For each equation in the equation array + there is an implied align space at the beginning of the equation. Within each equation, + the first & means alignmark, the second, align space, the third, alignmark, etc. + + For this reason when parsing m:r's in equation arrays it is important to keep track of what + the next ampersand will mean. + + $align=0 => Omml's align space, which is similar to MathML's maligngroup. + $align=1 => Omml's alignment mark, which is similar to MathML's malignmark. + --> + <xsl:template name="ParseEqArrMr"> + <xsl:param name="sToParse" /> + <xsl:param name="sty" /> + <xsl:param name="scr" /> + <xsl:param name="nor" /> + <xsl:param name="align" /> + + <xsl:if test="string-length($sToParse) > 0"> + <xsl:choose> + <xsl:when test="substring($sToParse,1,1) = '&'"> + <xsl:choose> + <xsl:when test="$align='0'"> + <mml:maligngroup /> + </xsl:when> + <xsl:when test="$align='1'"> + <mml:malignmark /> + </xsl:when> + </xsl:choose> + <xsl:call-template name="ParseEqArrMr"> + <xsl:with-param name="sToParse" select="substring($sToParse,2)" /> + <xsl:with-param name="scr" select="$scr" /> + <xsl:with-param name="sty" select="$sty" /> + <xsl:with-param name="nor" select="$nor" /> + <xsl:with-param name="align"> + <xsl:choose> + <xsl:when test="$align='1'">0</xsl:when> + <xsl:otherwise>1</xsl:otherwise> + </xsl:choose> + </xsl:with-param> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:variable name="sRepNumWith0"> + <xsl:call-template name="SReplaceNumWithZero"> + <xsl:with-param name="sToParse" select="$sToParse" /> + </xsl:call-template> + </xsl:variable> + <xsl:variable name="sRepOperWith-"> + <xsl:call-template name="SReplaceOperWithMinus"> + <xsl:with-param name="sToParse" select="$sRepNumWith0" /> + </xsl:call-template> + </xsl:variable> + + <xsl:variable name="iFirstOper" select="string-length($sRepOperWith-) - string-length(substring-after($sRepOperWith-, '-'))" /> + <xsl:variable name="iFirstNum" select="string-length($sRepOperWith-) - string-length(substring-after($sRepOperWith-, '0'))" /> + <xsl:variable name="iFirstAmp" select="string-length($sRepOperWith-) - string-length(substring-after($sRepOperWith-, '&'))" /> + <xsl:variable name="fNumAtPos1"> + <xsl:choose> + <xsl:when test="substring($sRepOperWith-,1,1)='0'">1</xsl:when> + <xsl:otherwise>0</xsl:otherwise> + </xsl:choose> + </xsl:variable> + <xsl:variable name="fOperAtPos1"> + <xsl:choose> + <xsl:when test="substring($sRepOperWith-,1,1)='-'">1</xsl:when> + <xsl:otherwise>0</xsl:otherwise> + </xsl:choose> + </xsl:variable> + <xsl:choose> + + <!-- Case I: The string begins with neither a number, nor an operator --> + <xsl:when test="$fNumAtPos1='0' and $fOperAtPos1='0'"> + <xsl:choose> + <xsl:when test="$nor = 0"> + <mml:mi> + <xsl:call-template name="CreateTokenAttributes"> + <xsl:with-param name="scr" select="$scr" /> + <xsl:with-param name="sty" select="$sty" /> + <xsl:with-param name="nor" select="$nor" /> + <xsl:with-param name="nCharToPrint" select="1" /> + <xsl:with-param name="sTokenType" select="'mi'" /> + </xsl:call-template> + <xsl:variable name="sOutput" select="translate(substring($sToParse, 1, 1), ' ', ' ')" /> + <xsl:value-of select="$sOutput" /> + </mml:mi> + </xsl:when> + <xsl:otherwise> + <mml:mtext> + <xsl:variable name="sOutput" select="translate(substring($sToParse, 1, 1), ' ', ' ')" /> + <xsl:value-of select="$sOutput" /> + </mml:mtext> + </xsl:otherwise> + </xsl:choose> + <xsl:call-template name="ParseEqArrMr"> + <xsl:with-param name="sToParse" select="substring($sToParse, 2)" /> + <xsl:with-param name="scr" select="$scr" /> + <xsl:with-param name="sty" select="$sty" /> + <xsl:with-param name="nor" select="$nor" /> + <xsl:with-param name="align" select="$align" /> + </xsl:call-template> + </xsl:when> + + <!-- Case II: There is an operator at position 1 --> + <xsl:when test="$fOperAtPos1='1'"> + <xsl:choose> + <xsl:when test="$nor = 0"> + <mml:mo> + <xsl:call-template name="CreateTokenAttributes"> + <xsl:with-param name="scr" /> + <xsl:with-param name="sty" /> + <xsl:with-param name="nor" select="$nor" /> + <xsl:with-param name="sTokenType" select="'mo'" /> + </xsl:call-template> + <xsl:value-of select="substring($sToParse,1,1)" /> + </mml:mo> + </xsl:when> + <xsl:otherwise> + <mml:mtext> + <xsl:value-of select="substring($sToParse,1,1)" /> + </mml:mtext> + </xsl:otherwise> + </xsl:choose> + <xsl:call-template name="ParseEqArrMr"> + <xsl:with-param name="sToParse" select="substring($sToParse, 2)" /> + <xsl:with-param name="scr" select="$scr" /> + <xsl:with-param name="sty" select="$sty" /> + <xsl:with-param name="nor" select="$nor" /> + <xsl:with-param name="align" select="$align" /> + </xsl:call-template> + </xsl:when> + + <!-- Case III: There is a number at position 1 --> + <xsl:otherwise> + <xsl:variable name="sConsecNum"> + <xsl:call-template name="SNumStart"> + <xsl:with-param name="sToParse" select="$sToParse" /> + <xsl:with-param name="sPattern" select="$sRepNumWith0" /> + </xsl:call-template> + </xsl:variable> + <xsl:choose> + <xsl:when test="$nor = 0"> + <mml:mn> + <xsl:call-template name="CreateTokenAttributes"> + <xsl:with-param name="scr" /> + <xsl:with-param name="sty" select="'p'"/> + <xsl:with-param name="nor" select="$nor" /> + <xsl:with-param name="sTokenType" select="'mn'" /> + </xsl:call-template> + <xsl:value-of select="$sConsecNum" /> + </mml:mn> + </xsl:when> + <xsl:otherwise> + <mml:mtext> + <xsl:value-of select="$sConsecNum" /> + </mml:mtext> + </xsl:otherwise> + </xsl:choose> + <xsl:call-template name="ParseEqArrMr"> + <xsl:with-param name="sToParse" select="substring-after($sToParse, $sConsecNum)" /> + <xsl:with-param name="scr" select="$scr" /> + <xsl:with-param name="sty" select="$sty" /> + <xsl:with-param name="nor" select="$nor" /> + <xsl:with-param name="align" select="$align" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + </xsl:if> + </xsl:template> + + <!-- %%Template: ParseMt + + Produce a run of text. Technically, OMML makes no distinction + between numbers, operators, and other characters in a run. For + MathML we need to break these into mi, mn, or mo elements. + + See also ParseEqArrMr + --> + <xsl:template name="ParseMt"> + <xsl:param name="sToParse" /> + <xsl:param name="sty" /> + <xsl:param name="scr" /> + <xsl:param name="nor" /> + <xsl:if test="string-length($sToParse) > 0"> + <xsl:variable name="sRepNumWith0"> + <xsl:call-template name="SReplaceNumWithZero"> + <xsl:with-param name="sToParse" select="$sToParse" /> + </xsl:call-template> + </xsl:variable> + <xsl:variable name="sRepOperWith-"> + <xsl:call-template name="SReplaceOperWithMinus"> + <xsl:with-param name="sToParse" select="$sRepNumWith0" /> + </xsl:call-template> + </xsl:variable> + + <xsl:variable name="iFirstOper" select="string-length($sRepOperWith-) - string-length(substring-after($sRepOperWith-, '-'))" /> + <xsl:variable name="iFirstNum" select="string-length($sRepOperWith-) - string-length(substring-after($sRepOperWith-, '0'))" /> + <xsl:variable name="fNumAtPos1"> + <xsl:choose> + <xsl:when test="substring($sRepOperWith-,1,1)='0'">1</xsl:when> + <xsl:otherwise>0</xsl:otherwise> + </xsl:choose> + </xsl:variable> + <xsl:variable name="fOperAtPos1"> + <xsl:choose> + <xsl:when test="substring($sRepOperWith-,1,1)='-'">1</xsl:when> + <xsl:otherwise>0</xsl:otherwise> + </xsl:choose> + </xsl:variable> + + <xsl:choose> + + <!-- Case I: The string begins with neither a number, nor an operator --> + <xsl:when test="$fOperAtPos1='0' and $fNumAtPos1='0'"> + <xsl:variable name="nCharToPrint"> + <xsl:choose> + <xsl:when test="ancestor::m:fName"> + <xsl:choose> + <xsl:when test="($iFirstOper=$iFirstNum) and + ($iFirstOper=string-length($sToParse)) and + (substring($sRepOperWith-, string-length($sRepOperWith-))!='0') and + (substring($sRepOperWith-, string-length($sRepOperWith-))!='-')"> + <xsl:value-of select="string-length($sToParse)" /> + </xsl:when> + <xsl:when test="$iFirstOper < $iFirstNum"> + <xsl:value-of select="$iFirstOper - 1" /> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$iFirstNum - 1" /> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:otherwise>1</xsl:otherwise> + </xsl:choose> + </xsl:variable> + + <mml:mi> + <xsl:call-template name="CreateTokenAttributes"> + <xsl:with-param name="scr" select="$scr" /> + <xsl:with-param name="sty" select="$sty" /> + <xsl:with-param name="nor" select="$nor" /> + <xsl:with-param name="nCharToPrint" select="$nCharToPrint" /> + <xsl:with-param name="sTokenType" select="'mi'" /> + </xsl:call-template> + <xsl:variable name="sWrite" select="translate(substring($sToParse, 1, $nCharToPrint), ' ', ' ')" /> + <xsl:value-of select="$sWrite" /> + </mml:mi> + <xsl:call-template name="ParseMt"> + <xsl:with-param name="sToParse" select="substring($sToParse, $nCharToPrint+1)" /> + <xsl:with-param name="scr" select="$scr" /> + <xsl:with-param name="sty" select="$sty" /> + <xsl:with-param name="nor" select="$nor" /> + </xsl:call-template> + </xsl:when> + + <!-- Case II: There is an operator at position 1 --> + <xsl:when test="$fOperAtPos1='1'"> + <mml:mo> + <xsl:call-template name="CreateTokenAttributes"> + <xsl:with-param name="scr" /> + <xsl:with-param name="sty" /> + <xsl:with-param name="nor" select="$nor" /> + <xsl:with-param name="sTokenType" select="'mo'" /> + </xsl:call-template> + <xsl:value-of select="substring($sToParse,1,1)" /> + </mml:mo> + <xsl:call-template name="ParseMt"> + <xsl:with-param name="sToParse" select="substring($sToParse, 2)" /> + <xsl:with-param name="scr" select="$scr" /> + <xsl:with-param name="sty" select="$sty" /> + <xsl:with-param name="nor" select="$nor" /> + </xsl:call-template> + </xsl:when> + + <!-- Case III: There is a number at position 1 --> + <xsl:otherwise> + <xsl:variable name="sConsecNum"> + <xsl:call-template name="SNumStart"> + <xsl:with-param name="sToParse" select="$sToParse" /> + <xsl:with-param name="sPattern" select="$sRepNumWith0" /> + </xsl:call-template> + </xsl:variable> + <mml:mn> + <xsl:call-template name="CreateTokenAttributes"> + <xsl:with-param name="scr" select="$scr" /> + <xsl:with-param name="sty" select="'p'" /> + <xsl:with-param name="nor" select="$nor" /> + <xsl:with-param name="sTokenType" select="'mn'" /> + </xsl:call-template> + <xsl:value-of select="$sConsecNum" /> + </mml:mn> + <xsl:call-template name="ParseMt"> + <xsl:with-param name="sToParse" select="substring-after($sToParse, $sConsecNum)" /> + <xsl:with-param name="scr" select="$scr" /> + <xsl:with-param name="sty" select="$sty" /> + <xsl:with-param name="nor" select="$nor" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:if> + </xsl:template> + + <!-- %%Template: SNumStart + + Return the longest substring of sToParse starting from the + start of sToParse that is a number. In addition, it takes the + pattern string, which is sToParse with all of its numbers + replaced with a 0. sPattern should be the same length + as sToParse + --> + <xsl:template name="SNumStart"> + <xsl:param name="sToParse" select="''" /> + <!-- if we don't get anything, take the string itself --> + <xsl:param name="sPattern" select="'$sToParse'" /> + + + <xsl:choose> + <!-- the pattern says this is a number, recurse with the rest --> + <xsl:when test="substring($sPattern, 1, 1) = '0'"> + <xsl:call-template name="SNumStart"> + <xsl:with-param name="sToParse" select="$sToParse" /> + <xsl:with-param name="sPattern" select="substring($sPattern, 2)" /> + </xsl:call-template> + </xsl:when> + + <!-- the pattern says we've run out of numbers. Take as many + characters from sToParse as we shaved off sPattern --> + <xsl:otherwise> + <xsl:value-of select="substring($sToParse, 1, string-length($sToParse) - string-length($sPattern))" /> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <!-- %%Template SRepeatCharAcc + + The core of SRepeatChar with an accumulator. The current + string is in param $acc, and we will double and recurse, + if we're less than half of the required length or else just + add the right amount of characters to the accumulator and + return + --> + <xsl:template name="SRepeatCharAcc"> + <xsl:param name="cchRequired" select="1" /> + <xsl:param name="ch" select="'-'" /> + <xsl:param name="acc" select="$ch" /> + + <xsl:variable name="cchAcc" select="string-length($acc)" /> + <xsl:choose> + <xsl:when test="(2 * $cchAcc) < $cchRequired"> + <xsl:call-template name="SRepeatCharAcc"> + <xsl:with-param name="cchRequired" select="$cchRequired" /> + <xsl:with-param name="ch" select="$ch" /> + <xsl:with-param name="acc" select="concat($acc, $acc)" /> + </xsl:call-template> + </xsl:when> + + <xsl:otherwise> + <xsl:value-of select="concat($acc, substring($acc, 1, $cchRequired - $cchAcc))" /> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + + <!-- %%Template SRepeatChar + + Generates a string nchRequired long by repeating the given character ch + --> + <xsl:template name="SRepeatChar"> + <xsl:param name="cchRequired" select="1" /> + <xsl:param name="ch" select="'-'" /> + + <xsl:call-template name="SRepeatCharAcc"> + <xsl:with-param name="cchRequired" select="$cchRequired" /> + <xsl:with-param name="ch" select="$ch" /> + <xsl:with-param name="acc" select="$ch" /> + </xsl:call-template> + </xsl:template> + + <!-- %%Template SReplaceOperWithMinus + + Go through the given string and replace every instance + of an operator with a minus '-'. This helps quickly identify + the first instance of an operator. + --> + <xsl:template name="SReplaceOperWithMinus"> + <xsl:param name="sToParse" select="''" /> + + <xsl:value-of select="translate($sToParse, $sOperators, $sMinuses)" /> + </xsl:template> + + <!-- %%Template SReplaceNumWithZero + + Go through the given string and replace every instance + of an number with a zero '0'. This helps quickly identify + the first occurence of a number. + + Considers the '.' and ',' part of a number iff they are sandwiched + between two other numbers. 0.3 will be recognized as a number, + x.3 will not be. Since these characters can also be an operator, this + should be called before SReplaceOperWithMinus. + --> + <xsl:template name="SReplaceNumWithZero"> + <xsl:param name="sToParse" select="''" /> + + <!-- First do a simple replace. Numbers will all be come 0's. + After this point, the pattern involving the . or , that + we are looking for will become 0.0 or 0,0 --> + <xsl:variable name="sSimpleReplace" select="translate($sToParse, $sNumbers, $sZeros)" /> + + <!-- And then, replace 0.0 with just 000. This means that the . will + become part of the number --> + <xsl:variable name="sReplacePeriod"> + <xsl:call-template name="SReplace"> + <xsl:with-param name="sInput" select="$sSimpleReplace" /> + <xsl:with-param name="sOrig" select="'0.0'" /> + <xsl:with-param name="sReplacement" select="'000'" /> + </xsl:call-template> + </xsl:variable> + + <!-- And then, replace 0,0 with just 000. This means that the , will + become part of the number --> + <xsl:call-template name="SReplace"> + <xsl:with-param name="sInput" select="$sReplacePeriod" /> + <xsl:with-param name="sOrig" select="'0,0'" /> + <xsl:with-param name="sReplacement" select="'000'" /> + </xsl:call-template> + </xsl:template> + + <!-- Template to translate Word's borderBox properties into the menclose notation attribute + The initial call to this SHOULD NOT pass an sAttribute. Subsequent calls to + CreateMencloseNotationAttrFromBorderBoxAttr by CreateMencloseNotationAttrFromBorderBoxAttr will + update the sAttribute as appropriate. + + CreateMencloseNotationAttrFromBorderBoxAttr looks at each attribute (fHideTop, fHideBot, etc.) one at a time + in the order they are listed and passes a modified sAttribute to CreateMencloseNotationAttrFromBorderBoxAttr. + Each successive call to CreateMencloseNotationAttrFromBorderBoxAttr knows which attribute to look at because + the previous call should have omitted passing the attribute it just analyzed. This is why as you read lower + and lower in the template that each call to CreateMencloseNotationAttrFromBorderBoxAttr has fewer and fewer attributes. + --> + <xsl:template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:param name="fHideTop" /> + <xsl:param name="fHideBot" /> + <xsl:param name="fHideLeft" /> + <xsl:param name="fHideRight" /> + <xsl:param name="fStrikeH" /> + <xsl:param name="fStrikeV" /> + <xsl:param name="fStrikeBLTR" /> + <xsl:param name="fStrikeTLBR" /> + <xsl:param name="sAttribute" /> + + <xsl:choose> + <xsl:when test="string-length($sAttribute) = 0"> + <xsl:choose> + <xsl:when test="string-length($fHideTop) > 0 + and string-length($fHideBot) > 0 + and string-length($fHideLeft) > 0 + and string-length($fHideRight) > 0"> + + <xsl:choose> + <xsl:when test="$fHideTop = 0 + and $fHideBot = 0 + and $fHideLeft = 0 + and $fHideRight = 0"> + <!-- We can use 'box' instead of top, bot, left, and right. Therefore, + replace sAttribute with 'box' and begin analyzing params fStrikeH + and below. --> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute"> + <xsl:text>box</xsl:text> + </xsl:with-param> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <!-- Can't use 'box', theremore, must analyze all attributes --> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fHideTop" select="$fHideTop" /> + <xsl:with-param name="fHideBot" select="$fHideBot" /> + <xsl:with-param name="fHideLeft" select="$fHideLeft" /> + <xsl:with-param name="fHideRight" select="$fHideRight" /> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute"> + <!-- Assume using all four (left right top bottom). Subsequent calls + will remove the sides which aren't to be includes. --> + <xsl:text>left right top bottom</xsl:text> + </xsl:with-param> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + </xsl:choose> + </xsl:when> + <xsl:otherwise> + <xsl:choose> + <xsl:when test="string-length($fHideTop) > 0"> + <xsl:choose> + <xsl:when test="$fHideTop=1"> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fHideBot" select="$fHideBot" /> + <xsl:with-param name="fHideLeft" select="$fHideLeft" /> + <xsl:with-param name="fHideRight" select="$fHideRight" /> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute"> + <xsl:call-template name="SReplace"> + <xsl:with-param name="sInput" select="$sAttribute" /> + <xsl:with-param name="sOrig" select="'top'" /> + <xsl:with-param name="sReplacement" select="''" /> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fHideBot" select="$fHideBot" /> + <xsl:with-param name="fHideLeft" select="$fHideLeft" /> + <xsl:with-param name="fHideRight" select="$fHideRight" /> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute" select="$sAttribute" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:when test="string-length($fHideBot) > 0"> + <xsl:choose> + <xsl:when test="$fHideBot=1"> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fHideLeft" select="$fHideLeft" /> + <xsl:with-param name="fHideRight" select="$fHideRight" /> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute"> + <xsl:call-template name="SReplace"> + <xsl:with-param name="sInput" select="$sAttribute" /> + <xsl:with-param name="sOrig" select="'bottom'" /> + <xsl:with-param name="sReplacement" select="''" /> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fHideLeft" select="$fHideLeft" /> + <xsl:with-param name="fHideRight" select="$fHideRight" /> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute" select="$sAttribute" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:when test="string-length($fHideLeft) > 0"> + <xsl:choose> + <xsl:when test="$fHideLeft=1"> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fHideRight" select="$fHideRight" /> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute"> + <xsl:call-template name="SReplace"> + <xsl:with-param name="sInput" select="$sAttribute" /> + <xsl:with-param name="sOrig" select="'left'" /> + <xsl:with-param name="sReplacement" select="''" /> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fHideRight" select="$fHideRight" /> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute" select="$sAttribute" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:when test="string-length($fHideRight) > 0"> + <xsl:choose> + <xsl:when test="$fHideRight=1"> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute"> + <xsl:call-template name="SReplace"> + <xsl:with-param name="sInput" select="$sAttribute" /> + <xsl:with-param name="sOrig" select="'right'" /> + <xsl:with-param name="sReplacement" select="''" /> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fStrikeH" select="$fStrikeH" /> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute" select="$sAttribute" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:when test="string-length($fStrikeH) > 0"> + <xsl:choose> + <xsl:when test="$fStrikeH=1"> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute" select="concat($sAttribute, ' horizontalstrike')" /> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fStrikeV" select="$fStrikeV" /> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute" select="$sAttribute" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:when test="string-length($fStrikeV) > 0"> + <xsl:choose> + <xsl:when test="$fStrikeV=1"> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute" select="concat($sAttribute, ' verticalstrike')" /> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fStrikeBLTR" select="$fStrikeBLTR" /> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute" select="$sAttribute" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:when test="string-length($fStrikeBLTR) > 0"> + <xsl:choose> + <xsl:when test="$fStrikeBLTR=1"> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute" select="concat($sAttribute, ' updiagonalstrike')" /> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="fStrikeTLBR" select="$fStrikeTLBR" /> + <xsl:with-param name="sAttribute" select="$sAttribute" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:when test="string-length($fStrikeTLBR) > 0"> + <xsl:choose> + <xsl:when test="$fStrikeTLBR=1"> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="sAttribute" select="concat($sAttribute, ' downdiagonalstrike')" /> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:call-template name="CreateMencloseNotationAttrFromBorderBoxAttr"> + <xsl:with-param name="sAttribute" select="$sAttribute" /> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:otherwise> + <xsl:attribute name="notation"> + <xsl:value-of select="normalize-space($sAttribute)" /> + </xsl:attribute> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <!-- Tristate (true, false, neither) from string value --> + <xsl:template name="TFromStrVal"> + <xsl:param name="str" /> + <xsl:choose> + <xsl:when test="$str = 'on' or $str = '1' or $str = 'true'">1</xsl:when> + <xsl:when test="$str = 'off' or $str = '0' or $str = 'false'">0</xsl:when> + <xsl:otherwise>-1</xsl:otherwise> + </xsl:choose> + </xsl:template> + + <!-- Return 0 iff $str is explicitly set to a false value. + Return true otherwise --> + <xsl:template name="ForceFalseStrVal"> + <xsl:param name="str" /> + <xsl:variable name="tValue"> + <xsl:call-template name="TFromStrVal"> + <xsl:with-param name="str" select="$str"/> + </xsl:call-template> + </xsl:variable> + <xsl:choose> + <xsl:when test="$tValue = '0'">0</xsl:when> + <xsl:otherwise>1</xsl:otherwise> + </xsl:choose> + </xsl:template> + + <!-- Return 1 iff $str is explicitly set to a true value. + Return false otherwise --> + <xsl:template name="ForceTrueStrVal"> + <xsl:param name="str" /> + <xsl:variable name="tValue"> + <xsl:call-template name="TFromStrVal"> + <xsl:with-param name="str" select="$str"/> + </xsl:call-template> + </xsl:variable> + <xsl:choose> + <xsl:when test="$tValue = '1'">1</xsl:when> + <xsl:otherwise>0</xsl:otherwise> + </xsl:choose> + </xsl:template> +</xsl:stylesheet> \ No newline at end of file diff --git a/model_ai/llama.py b/model_ai/llama.py index 9d69082..5d38f16 100644 --- a/model_ai/llama.py +++ b/model_ai/llama.py @@ -1,6 +1,7 @@ # Standard library imports import logging import os +import time from config.settings.base import ( LLAMA_ENABLED, @@ -97,7 +98,9 @@ def _run_as_content_generation(self, user_input): model = genai.GenerativeModel('models/gemini-3.1-flash-lite-preview') # Generate content using Gemini - return model.generate_content(user_input).text + response_gemini = model.generate_content(user_input).text + time.sleep(15) + return response_gemini # Gemini not configured, fallback to LLaMA else: