From 36dbf01ee41c5fb7acf33cafd23c4c80dd4dc4bc Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Fri, 19 Jul 2019 10:00:27 -0300 Subject: [PATCH] Recuperar o SciELO ID da versao aop para inserir na versao regular --- .../bin/xml/app_modules/app/data/article.py | 2 +- .../app_modules/app/data/scielo_id_manager.py | 55 ++++---- .../bin/xml/app_modules/app/db/xc_models.py | 2 + .../app/pkg_processors/pkg_processors.py | 9 +- .../bin/xml/app_modules/generics/exporter.py | 129 +++++++++++++----- .../bin/xml/tests/test_xc_add_scielo_id.py | 9 +- 6 files changed, 138 insertions(+), 68 deletions(-) diff --git a/src/scielo/bin/xml/app_modules/app/data/article.py b/src/scielo/bin/xml/app_modules/app/data/article.py index 9724a8eca..8553982ec 100644 --- a/src/scielo/bin/xml/app_modules/app/data/article.py +++ b/src/scielo/bin/xml/app_modules/app/data/article.py @@ -2002,7 +2002,7 @@ def reference(self): self._ref.trans_title_language = self.trans_title_language self._ref.contrib_xml_items = self.contrib_xml_items self._ref.person_group_xml_items = self.person_group_xml_items - self._ref.page_range = self.page_range + self._ref.page_range = first_item(self.page_range) self._ref.doi = self.doi self._ref.pmid = self.pmid self._ref.pmcid = self.pmcid diff --git a/src/scielo/bin/xml/app_modules/app/data/scielo_id_manager.py b/src/scielo/bin/xml/app_modules/app/data/scielo_id_manager.py index 33d41f097..fd62bae98 100644 --- a/src/scielo/bin/xml/app_modules/app/data/scielo_id_manager.py +++ b/src/scielo/bin/xml/app_modules/app/data/scielo_id_manager.py @@ -3,32 +3,35 @@ import xml.etree.ElementTree as ET -def add_scielo_id_to_received_documents( - received_documents, registered_documents, file_paths): - """Atualiza scielo_id dos documentos recebidos.""" +def add_scielo_id_to_documents(received_documents, registered_documents): + """Atualiza scielo_id.""" for name, received in received_documents.items(): - if not received.scielo_id: - add_scielo_id( - received, - registered_documents.get(name), - file_paths.get(name), - ) + registered_id = None + registered = registered_documents.get(name) + if registered: + registered_id = registered.registered_scielo_id + received.registered_scielo_id = registered_id or scielo_id_gen.generate_scielo_pid() -def add_scielo_id(received, registered, file_path): - """Atualiza received.registered_scielo_id com o valor do - registered.scielo_id ou gerando um novo scielo_id.""" - if registered and registered.scielo_id: - received.registered_scielo_id = registered.scielo_id - else: - received.registered_scielo_id = scielo_id_gen.generate_scielo_pid() - xml = ET.parse(file_path) - node = xml.find(".//article-meta") - if node is not None: - article_id = ET.Element("article-id") - article_id.set("specific-use", "scielo") - article_id.set("pub-type-id", "publisher-id") - article_id.text = received.registered_scielo_id - node.insert(0, article_id) - new_content = ET.tostring(xml.find(".")).decode("utf-8") - fs_utils.write_file(file_path, new_content) +def add_scielo_id_to_xml_files(received_documents, file_paths): + """Atualiza scielo_id.""" + for name, received in received_documents.items(): + file_path = file_paths.get(name) + xml = ET.parse(file_path) + article_meta = xml.find(".//article-meta") + if article_meta is None: + continue + article_id_node = xml.find( + ".//article-meta/article-id[@specific-use='scielo']") + if article_id_node is None: + article_id_node = ET.Element("article-id") + article_id_node.set("specific-use", "scielo") + article_id_node.set("pub-type-id", "publisher-id") + + if (article_id_node is not None and + article_id_node.text != received.registered_scielo_id): + article_id_node.text = received.registered_scielo_id + + article_meta.insert(0, article_id_node) + new_content = ET.tostring(xml.find(".")).decode("utf-8") + fs_utils.write_file(file_path, new_content) diff --git a/src/scielo/bin/xml/app_modules/app/db/xc_models.py b/src/scielo/bin/xml/app_modules/app/db/xc_models.py index 11cdd2755..e79a69027 100644 --- a/src/scielo/bin/xml/app_modules/app/db/xc_models.py +++ b/src/scielo/bin/xml/app_modules/app/db/xc_models.py @@ -784,6 +784,7 @@ def get_valid_aop(self, article): self.xc_messages.extend(messages) if valid_aop is not None: article.registered_aop_pid = valid_aop.pid + article.registered_scielo_id = valid_aop.registered_scielo_id return (aop_status, valid_aop) def exclude_aop(self, valid_aop): @@ -807,6 +808,7 @@ def convert_article(self, article, i_record, xml_name): valid_aop = None if not article.is_ahead: aop_status, valid_aop = self.get_valid_aop(article) + id_created = self.base_manager.save_article(article, i_record) article_converted = id_created if id_created is True: diff --git a/src/scielo/bin/xml/app_modules/app/pkg_processors/pkg_processors.py b/src/scielo/bin/xml/app_modules/app/pkg_processors/pkg_processors.py index d54afe027..6888cb02c 100644 --- a/src/scielo/bin/xml/app_modules/app/pkg_processors/pkg_processors.py +++ b/src/scielo/bin/xml/app_modules/app/pkg_processors/pkg_processors.py @@ -122,10 +122,9 @@ def convert(self, export_documents_package=None): scilista_items = [self.pkg.issue_data.acron_issue_label] if self.validations_reports.blocking_errors == 0 and (self.accepted_articles == len(self.pkg.articles) or len(self.articles_mergence.excluded_orders) > 0): self.error_messages = self.db.exclude_articles(self.articles_mergence.excluded_orders) - scielo_id_manager.add_scielo_id_to_received_documents( + scielo_id_manager.add_scielo_id_to_documents( self.articles_mergence.accepted_articles, - self.articles_mergence.registered_articles, - self.pkg.file_paths) + self.articles_mergence.registered_articles) _scilista_items = self.db.convert_articles(self.pkg.issue_data.acron_issue_label, self.articles_mergence.accepted_articles, self.registered_issue_data.issue_models.record, self.create_windows_base) scilista_items.extend(_scilista_items) @@ -136,8 +135,10 @@ def convert(self, export_documents_package=None): self.articles_conversion_validations[name].message = message if len(_scilista_items) > 0: + scielo_id_manager.add_scielo_id_to_xml_files( + self.articles_mergence.accepted_articles, + self.pkg.file_paths) # IMPROVEME - self.registered_issue_data.issue_files.copy_files_to_local_web_app(self.pkg.package_folder.path, self.local_web_app_path) self.registered_issue_data.issue_files.save_source_files(self.pkg.package_folder.path) if export_documents_package: diff --git a/src/scielo/bin/xml/app_modules/generics/exporter.py b/src/scielo/bin/xml/app_modules/generics/exporter.py index e82dadc36..7540d78de 100644 --- a/src/scielo/bin/xml/app_modules/generics/exporter.py +++ b/src/scielo/bin/xml/app_modules/generics/exporter.py @@ -3,75 +3,138 @@ import shutil import logging import tempfile +import threading from ftplib import FTP, all_errors +logging.basicConfig( + filename='./exporter.log', + format=u'%(asctime)s %(message)s') +logger = logging.getLogger('Exporter') +logger.setLevel(logging.DEBUG) + + +try: + os.unlink('./exporter.log') +except: + pass +exp_logger = logger + + class Exporter(object): def __init__(self, data): self._data = data @property - def ftp_config(self): + def ftp_configuration(self): try: server = self._data["server"] user = self._data["user"] password = self._data["password"] - remote_path = self._data.get("remote_path") except KeyError: - raise KeyError("Exporter: Configuration failure") + exp_logger.info("Exporter: Missing FTP Configuration") else: - return server, user, password, remote_path + return server, user, password, self._data.get("remote_path") + + @property + def copy_configuration(self): + try: + destination_path = self._data["destination_path"] + except KeyError: + exp_logger.info("Exporter: Missing Destination Configuration") + else: + return destination_path def export(self, files_path, zip_filename): + destination_path = self.copy_configuration + ftp_configuration = self.ftp_configuration + + if not destination_path and not ftp_configuration: + exp_logger.info("Exporter: Missing Configuration") + return + zip_file_path = self.zip(files_path, zip_filename) if zip_file_path: - self.export_by_ftp(zip_file_path) - try: - os.unlink(zip_file_path) - shutil.rmtree(os.path.dirname(zip_file_path)) - except OSError: - logging.info( - "Exporter: Unable to delete temp: %s" % zip_file_path) + if destination_path: + if not os.path.isdir(destination_path): + os.makedirs(destination_path) + if ftp_configuration: + shutil.copy(zip_file_path, destination_path) + else: + shutil.move(zip_file_path, destination_path) + if ftp_configuration: + server, user, password, remote_path = ftp_configuration + self.export_by_ftp( + zip_file_path, server, user, password, remote_path) def zip(self, files_path, zip_filename): try: dest_path = tempfile.mkdtemp() except IOError: - logging.info("Exporter: Unable to create temp dir") + exp_logger.info("Exporter: Unable to create temp dir") else: zip_file_path = os.path.join(dest_path, zip_filename) try: with zipfile.ZipFile(zip_file_path, 'w') as zipf: + exp_logger.info( + "Create %s from %s" % (zip_file_path, files_path)) for item in os.listdir(files_path): file_path = os.path.join(files_path, item) zipf.write(file_path, arcname=item) except IOError: - logging.info( + exp_logger.info( "Exporter: Unable to create zip: %s" % zip_filename ) else: return zip_file_path - def export_by_ftp(self, local_file_path): + def export_by_ftp(self, local_file_path, server, user, password, remote_path): + background = AsyncFTP(local_file_path, server, user, password, remote_path) + background.start() + + +class AsyncFTP(threading.Thread): + def __init__(self, local_file_path, server, user, password, remote_path, timeout=60): + threading.Thread.__init__(self) + self.local_file_path = local_file_path + self.server = server + self.user = user + self.password = password + self.remote_path = remote_path + self.timeout = timeout + + def run(self): + exp_logger.info("FTP.START") try: - config = self.ftp_config - except KeyError: - logging.info("Export: Invalid configuration") - else: - server, user, password, remote_path = config + ftp = FTP(self.server, self.user, self.password, self.timeout) + except all_errors as e: + exp_logger.info(e) + return + try: + if self.remote_path: + ftp.cwd(self.remote_path) + exp_logger.info("ftp " + self.local_file_path) + remote_name = os.path.basename(self.local_file_path) + with open(self.local_file_path, 'rb') as f: + try: + exp_logger.info("FTP.STOR %s - start" % remote_name) + ftp.storbinary('STOR {}'.format(remote_name), f) + exp_logger.info("FTP.STOR %s - end" % remote_name) + except all_errors: + exp_logger.info( + 'FTP: Unable to send %s to %s' % + (self.local_file_path, remote_name), exc_info=True) + except all_errors as e: + exp_logger.info("all_errors as e") + + exp_logger.info(e) + finally: + ftp.close() + exp_logger.info("FTP.END") try: - with FTP(server, timeout=60) as ftp: - ftp.login(user, password) - if remote_path: - ftp.cwd(remote_path) - remote_name = os.path.basename(local_file_path) - with open(local_file_path, 'rb') as f: - try: - ftp.storbinary('STOR {}'.format(remote_name), f) - except all_errors: - logging.info( - 'FTP: Unable to send %s to %s' % - (local_file_path, remote_name), exc_info=True) - except all_errors: - logging.info("Unable to transfer: %s" % local_file_path) + os.unlink(self.local_file_path) + shutil.rmtree(os.path.dirname(self.local_file_path)) + except OSError: + exp_logger.info( + "Exporter: Unable to delete temp: %s" % self.local_file_path) diff --git a/src/scielo/bin/xml/tests/test_xc_add_scielo_id.py b/src/scielo/bin/xml/tests/test_xc_add_scielo_id.py index 101d23246..f1ca3ed3a 100644 --- a/src/scielo/bin/xml/tests/test_xc_add_scielo_id.py +++ b/src/scielo/bin/xml/tests/test_xc_add_scielo_id.py @@ -1,10 +1,9 @@ import unittest import os -import xml.etree.ElementTree as ET from copy import deepcopy from app_modules.app.data.scielo_id_manager import ( - add_scielo_id, - add_scielo_id_to_received_documents, + add_scielo_id_to_documents, + add_scielo_id_to_xml_files, ) @@ -40,9 +39,11 @@ def test_add_scielo_id(self): received.update({name: Article(None)}) file_paths.update({name: fname}) - add_scielo_id_to_received_documents(received, registered, file_paths) + add_scielo_id_to_documents(received, registered) + add_scielo_id_to_xml_files(received, file_paths) for name, item in received.items(): with self.subTest(name): + self.assertIsNotNone(item.registered_scielo_id) with open(file_paths[name], "r") as fp: content = fp.read()