quality_recticel/py_app/app/daily_mirror_db_setup.py

"""
Daily Mirror Database Setup and Management
Quality Recticel Application

This script creates the database schema and provides utilities for
data import and Daily Mirror reporting functionality.
"""

import mariadb
import pandas as pd
import os
from datetime import datetime, timedelta
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class DailyMirrorDatabase:
    def __init__(self, host='localhost', user='trasabilitate', password='Initial01!', database='trasabilitate'):
        self.host = host
        self.user = user
        self.password = password
        self.database = database
        self.connection = None

    def connect(self):
        """Establish database connection"""
        try:
            self.connection = mariadb.connect(
                host=self.host,
                user=self.user,
                password=self.password,
                database=self.database
            )
            logger.info("Database connection established")
            return True
        except Exception as e:
            logger.error(f"Database connection failed: {e}")
            return False

    def disconnect(self):
        """Close database connection"""
        if self.connection:
            self.connection.close()
            logger.info("Database connection closed")

    def create_database_schema(self):
        """Create the Daily Mirror database schema"""
        try:
            cursor = self.connection.cursor()

            # Read and execute the schema file
            schema_file = os.path.join(os.path.dirname(__file__), 'daily_mirror_database_schema.sql')

            if not os.path.exists(schema_file):
                logger.error(f"Schema file not found: {schema_file}")
                return False

            with open(schema_file, 'r') as file:
                schema_sql = file.read()

            # Split by statements and execute each one
            statements = []
            current_statement = ""

            for line in schema_sql.split('\n'):
                line = line.strip()
                if line and not line.startswith('--'):
                    current_statement += line + " "
                    if line.endswith(';'):
                        statements.append(current_statement.strip())
                        current_statement = ""

            # Add any remaining statement
            if current_statement.strip():
                statements.append(current_statement.strip())

            for statement in statements:
                if statement and any(statement.upper().startswith(cmd) for cmd in ['CREATE', 'ALTER', 'DROP', 'INSERT']):
                    try:
                        cursor.execute(statement)
                        logger.info(f"Executed: {statement[:80]}...")
                    except Exception as e:
                        if "already exists" not in str(e).lower():
                            logger.warning(f"Error executing statement: {e}")

            self.connection.commit()
            logger.info("Database schema created successfully")
            return True

        except Exception as e:
            logger.error(f"Error creating database schema: {e}")
            return False

    def import_production_data(self, file_path):
        """Import production data from Excel file (Comenzi Productie format)"""
        try:
            # The correct data is in the first sheet (DataSheet)
            df = None
            sheet_used = None

            # Get available sheets
            excel_file = pd.ExcelFile(file_path)
            logger.info(f"Available sheets: {excel_file.sheet_names}")

            # Try DataSheet first (where the actual production data is), then fallback options
            sheet_attempts = [
                ('DataSheet', 'openpyxl'),
                ('DataSheet', 'xlrd'),
                (0, 'openpyxl'),
                (0, 'xlrd'),
                ('Sheet1', 'openpyxl'),  # fallback to Sheet1 if DataSheet fails
                (1, 'openpyxl')
            ]

            for sheet_name, engine in sheet_attempts:
                try:
                    logger.info(f"Trying to read sheet '{sheet_name}' with engine '{engine}'")
                    df = pd.read_excel(file_path, sheet_name=sheet_name, engine=engine, header=0)
                    sheet_used = f"{sheet_name} (engine: {engine})"
                    logger.info(f"Successfully read from sheet: {sheet_used}")
                    break
                except Exception as e:
                    logger.warning(f"Failed to read sheet {sheet_name} with {engine}: {e}")
                    continue

            # If all engines fail on DataSheet, try a different approach
            if df is None:
                try:
                    logger.info("Trying alternative method: reading without specifying engine")
                    df = pd.read_excel(file_path, sheet_name='DataSheet')
                    sheet_used = "DataSheet (default engine)"
                    logger.info("Successfully read with default engine")
                except Exception as e:
                    logger.error(f"Failed with default engine: {e}")
                    raise Exception("Could not read the DataSheet from the Excel file. The file may be corrupted.")

            logger.info(f"Loaded production data from {sheet_used}: {len(df)} rows, {len(df.columns)} columns")
            logger.info(f"Available columns: {list(df.columns)}")

            cursor = self.connection.cursor()
            success_count = 0
            created_count = 0
            updated_count = 0
            error_count = 0

            # Prepare insert statement
            insert_sql = """
                INSERT INTO dm_production_orders (
                    production_order, customer_code, client_order, article_code,
                    article_description, quantity_requested, delivery_date, production_status,
                    end_of_quilting, end_of_sewing, t1_status, t1_registration_date, t1_operator_name,
                    t2_status, t2_registration_date, t2_operator_name, t3_status, t3_registration_date,
                    t3_operator_name, machine_code, machine_type, classification, total_norm_time,
                    data_deschiderii, model_lb2, data_planificare, machine_number, design_number, needle_position
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                ON DUPLICATE KEY UPDATE
                    customer_code = VALUES(customer_code),
                    client_order = VALUES(client_order),
                    article_code = VALUES(article_code),
                    article_description = VALUES(article_description),
                    quantity_requested = VALUES(quantity_requested),
                    delivery_date = VALUES(delivery_date),
                    production_status = VALUES(production_status),
                    updated_at = CURRENT_TIMESTAMP
            """

            for index, row in df.iterrows():
                try:
                    # Prepare data tuple
                    data = (
                        row.get('Comanda Productie', ''),
                        row.get('Customer', ''),
                        row.get('Comanda client', ''),
                        row.get('Cod Articol', ''),
                        row.get('Descriere', ''),
                        row.get('Cantitate ceruta', 0),
                        self._parse_date(row.get('Delivery date')),
                        row.get('Status', ''),
                        self._parse_datetime(row.get('End of Quilting')),
                        self._parse_datetime(row.get('End of sewing')),
                        row.get('T1', 0),
                        self._parse_datetime(row.get('Data inregistrare T1')),
                        row.get('Numele Complet T1', ''),
                        row.get('T2', 0),
                        self._parse_datetime(row.get('Data inregistrare T2')),
                        row.get('Numele Complet T2', ''),
                        row.get('T3', 0),
                        self._parse_datetime(row.get('Data inregistrare T3')),
                        row.get('Numele Complet T3', ''),
                        row.get('Masina Cusut ', ''),
                        row.get('Tip Masina', ''),
                        row.get('Clasificare', ''),
                        row.get('Timp normat total', 0),
                        self._parse_date(row.get('Data Deschiderii')),
                        row.get('Model Lb2', ''),
                        self._parse_date(row.get('Data Planific.')),
                        row.get('Numar masina', ''),
                        row.get('Design nr', 0),
                        row.get('Needle position', 0)
                    )

                    cursor.execute(insert_sql, data)

                    # Check if row was inserted (created) or updated
                    # In MySQL with ON DUPLICATE KEY UPDATE:
                    # - rowcount = 1 means INSERT (new row created)
                    # - rowcount = 2 means UPDATE (existing row updated)
                    # - rowcount = 0 means no change
                    if cursor.rowcount == 1:
                        created_count += 1
                    elif cursor.rowcount == 2:
                        updated_count += 1

                    success_count += 1

                except Exception as row_error:
                    logger.warning(f"Error processing row {index}: {row_error}")
                    error_count += 1
                    continue

            self.connection.commit()
            logger.info(f"Production data import completed: {success_count} successful, {error_count} failed")

            return {
                'success_count': success_count,
                'created_count': created_count,
                'updated_count': updated_count,
                'error_count': error_count,
                'total_rows': len(df)
            }

        except Exception as e:
            logger.error(f"Error importing production data: {e}")
            return None

    def import_orders_data(self, file_path):
        """Import orders data from Excel file with enhanced error handling"""
        try:
            # Ensure we have a database connection
            if not self.connection:
                self.connect()
                if not self.connection:
                    return {
                        'success_count': 0,
                        'error_count': 1,
                        'total_rows': 0,
                        'error_message': 'Could not establish database connection.'
                    }

            logger.info(f"Attempting to import orders data from: {file_path}")

            # Check if file exists
            if not os.path.exists(file_path):
                logger.error(f"Orders file not found: {file_path}")
                return {
                    'success_count': 0,
                    'error_count': 1,
                    'total_rows': 0,
                    'error_message': f'Orders file not found: {file_path}'
                }

            # Try to get sheet names first
            try:
                excel_file = pd.ExcelFile(file_path)
                sheet_names = excel_file.sheet_names
                logger.info(f"Available sheets in orders file: {sheet_names}")
            except Exception as e:
                logger.warning(f"Could not get sheet names: {e}")
                sheet_names = ['DataSheet', 'Sheet1']

            # Try multiple approaches to read the Excel file
            df = None
            sheet_used = None
            approaches = [
                ('openpyxl', 0, 'read_only'),
                ('openpyxl', 0, 'normal'),
                ('openpyxl', 1, 'normal'),
                ('xlrd', 0, 'normal') if file_path.endswith('.xls') else None,
                ('default', 0, 'normal')
            ]

            for approach in approaches:
                if approach is None:
                    continue

                engine, sheet_name, mode = approach
                try:
                    logger.info(f"Trying to read orders with engine: {engine}, sheet: {sheet_name}, mode: {mode}")

                    if engine == 'default':
                        df = pd.read_excel(file_path, sheet_name=sheet_name, header=0)
                    elif mode == 'read_only':
                        df = pd.read_excel(file_path, sheet_name=sheet_name, engine=engine, header=0)
                    else:
                        df = pd.read_excel(file_path, sheet_name=sheet_name, engine=engine, header=0)

                    sheet_used = f"{engine} (sheet: {sheet_name}, mode: {mode})"
                    logger.info(f"Successfully read orders data with: {sheet_used}")
                    break

                except Exception as e:
                    logger.warning(f"Failed with {engine}, sheet {sheet_name}, mode {mode}: {e}")
                    continue

            if df is None:
                logger.error("Could not read the orders file with any method")
                return {
                    'success_count': 0,
                    'error_count': 1,
                    'total_rows': 0,
                    'error_message': 'Could not read the orders Excel file. The file may have formatting issues or be corrupted.'
                }

            logger.info(f"Loaded orders data from {sheet_used}: {len(df)} rows, {len(df.columns)} columns")
            logger.info(f"Available columns: {list(df.columns)[:10]}...")

            cursor = self.connection.cursor()
            success_count = 0
            created_count = 0
            updated_count = 0
            error_count = 0

            # Prepare insert statement for orders
            insert_sql = """
                INSERT INTO dm_orders (
                    order_id, customer_code, customer_name, client_order,
                    article_code, article_description, quantity_requested, delivery_date,
                    order_status, product_group, order_date
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                ON DUPLICATE KEY UPDATE
                    customer_code = VALUES(customer_code),
                    customer_name = VALUES(customer_name),
                    client_order = VALUES(client_order),
                    article_code = VALUES(article_code),
                    article_description = VALUES(article_description),
                    quantity_requested = VALUES(quantity_requested),
                    delivery_date = VALUES(delivery_date),
                    order_status = VALUES(order_status),
                    product_group = VALUES(product_group),
                    order_date = VALUES(order_date),
                    updated_at = CURRENT_TIMESTAMP
            """

            # Process each row with the actual column mapping and better null handling
            for index, row in df.iterrows():
                try:
                    # Helper function to safely get values and handle NaN
                    def safe_get(row, column, default=''):
                        value = row.get(column, default)
                        if pd.isna(value) or value == 'nan':
                            return default
                        return str(value).strip() if isinstance(value, str) else value

                    def safe_get_int(row, column, default=0):
                        value = row.get(column, default)
                        if pd.isna(value) or value == 'nan':
                            return default
                        try:
                            return int(float(value)) if value != '' else default
                        except (ValueError, TypeError):
                            return default

                    # Map columns based on the actual Vizual. Artic. Comenzi Deschise format
                    data = (
                        safe_get(row, 'Comanda', f'ORD_{index:06d}'),  # Order ID
                        safe_get(row, 'Cod. Client'),  # Customer Code
                        safe_get(row, 'Customer Name'),  # Customer Name
                        safe_get(row, 'Com. Achiz. Client'),  # Client Order
                        safe_get(row, 'Cod Articol'),  # Article Code
                        safe_get(row, 'Part Description', safe_get(row, 'Descr. Articol')),  # Article Description
                        safe_get_int(row, 'Cantitate'),  # Quantity
                        self._parse_date(row.get('Data livrare')),  # Delivery Date
                        safe_get(row, 'Statut Comanda', 'PENDING'),  # Order Status
                        safe_get(row, 'Model'),  # Product Group
                        self._parse_date(row.get('Data Comenzii'))  # Order Date
                    )

                    cursor.execute(insert_sql, data)

                    # Track created vs updated
                    if cursor.rowcount == 1:
                        created_count += 1
                    elif cursor.rowcount == 2:
                        updated_count += 1

                    success_count += 1

                except Exception as row_error:
                    logger.warning(f"Error processing row {index}: {row_error}")
                    error_count += 1
                    continue

            self.connection.commit()
            logger.info(f"Orders import completed: {success_count} successful, {error_count} errors")

            return {
                'success_count': success_count,
                'created_count': created_count,
                'updated_count': updated_count,
                'error_count': error_count,
                'total_rows': len(df),
                'error_message': None if error_count == 0 else f'{error_count} rows failed to import'
            }

        except Exception as e:
            logger.error(f"Error importing orders data: {e}")
            return {
                'success_count': 0,
                'error_count': 1,
                'total_rows': 0,
                'error_message': str(e)
            }

    def import_delivery_data(self, file_path):
        """Import delivery data from Excel file with enhanced error handling"""
        try:
            # Ensure we have a database connection
            if not self.connection:
                self.connect()
                if not self.connection:
                    return {
                        'success_count': 0,
                        'error_count': 1,
                        'total_rows': 0,
                        'error_message': 'Could not establish database connection.'
                    }

            logger.info(f"Attempting to import delivery data from: {file_path}")

            # Check if file exists
            if not os.path.exists(file_path):
                logger.error(f"Delivery file not found: {file_path}")
                return {
                    'success_count': 0,
                    'error_count': 1,
                    'total_rows': 0,
                    'error_message': f'Delivery file not found: {file_path}'
                }

            # Try to get sheet names first
            try:
                excel_file = pd.ExcelFile(file_path)
                sheet_names = excel_file.sheet_names
                logger.info(f"Available sheets in delivery file: {sheet_names}")
            except Exception as e:
                logger.warning(f"Could not get sheet names: {e}")
                sheet_names = ['DataSheet', 'Sheet1']

            # Try multiple approaches to read the Excel file
            df = None
            sheet_used = None
            approaches = [
                ('openpyxl', 0, 'read_only'),
                ('openpyxl', 0, 'normal'),
                ('openpyxl', 1, 'normal'),
                ('xlrd', 0, 'normal') if file_path.endswith('.xls') else None,
                ('default', 0, 'normal')
            ]

            for approach in approaches:
                if approach is None:
                    continue

                engine, sheet_name, mode = approach
                try:
                    logger.info(f"Trying to read delivery data with engine: {engine}, sheet: {sheet_name}, mode: {mode}")

                    if engine == 'default':
                        df = pd.read_excel(file_path, sheet_name=sheet_name, header=0)
                    elif mode == 'read_only':
                        df = pd.read_excel(file_path, sheet_name=sheet_name, engine=engine, header=0)
                    else:
                        df = pd.read_excel(file_path, sheet_name=sheet_name, engine=engine, header=0)

                    sheet_used = f"{engine} (sheet: {sheet_name}, mode: {mode})"
                    logger.info(f"Successfully read delivery data with: {sheet_used}")
                    break

                except Exception as e:
                    logger.warning(f"Failed with {engine}, sheet {sheet_name}, mode {mode}: {e}")
                    continue

            if df is None:
                logger.error("Could not read the delivery file with any method")
                return {
                    'success_count': 0,
                    'error_count': 1,
                    'total_rows': 0,
                    'error_message': 'Could not read the delivery Excel file. The file may have formatting issues or be corrupted.'
                }

            logger.info(f"Loaded delivery data from {sheet_used}: {len(df)} rows, {len(df.columns)} columns")
            logger.info(f"Available columns: {list(df.columns)[:10]}...")

            cursor = self.connection.cursor()
            success_count = 0
            created_count = 0
            updated_count = 0
            error_count = 0

            # Prepare insert statement for deliveries
            insert_sql = """
                INSERT INTO dm_deliveries (
                    shipment_id, order_id, customer_code, customer_name,
                    article_code, article_description, quantity_delivered,
                    shipment_date, delivery_date, delivery_status, total_value
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                ON DUPLICATE KEY UPDATE
                    customer_code = VALUES(customer_code),
                    customer_name = VALUES(customer_name),
                    article_code = VALUES(article_code),
                    article_description = VALUES(article_description),
                    quantity_delivered = VALUES(quantity_delivered),
                    shipment_date = VALUES(shipment_date),
                    delivery_date = VALUES(delivery_date),
                    delivery_status = VALUES(delivery_status),
                    total_value = VALUES(total_value),
                    updated_at = CURRENT_TIMESTAMP
            """

            # Process each row with the actual column mapping and better null handling
            for index, row in df.iterrows():
                try:
                    # Helper function to safely get values and handle NaN
                    def safe_get(row, column, default=''):
                        value = row.get(column, default)
                        if pd.isna(value) or value == 'nan':
                            return default
                        return str(value).strip() if isinstance(value, str) else value

                    def safe_get_float(row, column, default=0.0):
                        value = row.get(column, default)
                        if pd.isna(value) or value == 'nan':
                            return default
                        try:
                            return float(value) if value != '' else default
                        except (ValueError, TypeError):
                            return default

                    def safe_get_int(row, column, default=0):
                        value = row.get(column, default)
                        if pd.isna(value) or value == 'nan':
                            return default
                        try:
                            return int(float(value)) if value != '' else default
                        except (ValueError, TypeError):
                            return default

                    # Map columns based on the actual Articole livrate_returnate format
                    data = (
                        safe_get(row, 'Document Number', f'SH_{index:06d}'),  # Shipment ID
                        safe_get(row, 'Comanda'),  # Order ID
                        safe_get(row, 'Cod. Client'),  # Customer Code
                        safe_get(row, 'Nume client'),  # Customer Name
                        safe_get(row, 'Cod Articol'),  # Article Code
                        safe_get(row, 'Part Description'),  # Article Description
                        safe_get_int(row, 'Cantitate'),  # Quantity Delivered
                        self._parse_date(row.get('Data')),  # Shipment Date
                        self._parse_date(row.get('Data')),  # Delivery Date (same as shipment for now)
                        safe_get(row, 'Stare', 'DELIVERED'),  # Delivery Status
                        safe_get_float(row, 'Total Price')  # Total Value
                    )

                    cursor.execute(insert_sql, data)

                    # Track created vs updated
                    if cursor.rowcount == 1:
                        created_count += 1
                    elif cursor.rowcount == 2:
                        updated_count += 1

                    success_count += 1

                except Exception as row_error:
                    logger.warning(f"Error processing delivery row {index}: {row_error}")
                    error_count += 1
                    continue

            self.connection.commit()
            logger.info(f"Delivery import completed: {success_count} successful, {error_count} errors")

            return {
                'success_count': success_count,
                'created_count': created_count,
                'updated_count': updated_count,
                'error_count': error_count,
                'total_rows': len(df),
                'error_message': None if error_count == 0 else f'{error_count} rows failed to import'
            }

        except Exception as e:
            logger.error(f"Error importing delivery data: {e}")
            return {
                'success_count': 0,
                'error_count': 1,
                'total_rows': 0,
                'error_message': str(e)
            }

    def generate_daily_summary(self, report_date=None):
        """Generate daily summary for Daily Mirror reporting"""
        if not report_date:
            report_date = datetime.now().date()

        try:
            cursor = self.connection.cursor()

            # Check if summary already exists for this date
            cursor.execute("SELECT id FROM dm_daily_summary WHERE report_date = ?", (report_date,))
            existing = cursor.fetchone()

            # Get production metrics
            cursor.execute("""
                SELECT
                    COUNT(*) as total_orders,
                    SUM(quantity_requested) as total_quantity,
                    SUM(CASE WHEN production_status = 'Inchis' THEN 1 ELSE 0 END) as completed_orders,
                    SUM(CASE WHEN end_of_quilting IS NOT NULL THEN 1 ELSE 0 END) as quilting_done,
                    SUM(CASE WHEN end_of_sewing IS NOT NULL THEN 1 ELSE 0 END) as sewing_done,
                    COUNT(DISTINCT customer_code) as unique_customers
                FROM dm_production_orders
                WHERE DATE(data_planificare) = ?
            """, (report_date,))

            production_metrics = cursor.fetchone()

            # Get active operators count
            cursor.execute("""
                SELECT COUNT(DISTINCT CASE
                    WHEN t1_operator_name IS NOT NULL THEN t1_operator_name
                    WHEN t2_operator_name IS NOT NULL THEN t2_operator_name
                    WHEN t3_operator_name IS NOT NULL THEN t3_operator_name
                END) as active_operators
                FROM dm_production_orders
                WHERE DATE(data_planificare) = ?
            """, (report_date,))

            operator_metrics = cursor.fetchone()
            active_operators = operator_metrics[0] or 0

            if existing:
                # Update existing summary
                update_sql = """
                    UPDATE dm_daily_summary SET
                        orders_quantity = ?, production_launched = ?, production_finished = ?,
                        quilting_completed = ?, sewing_completed = ?, unique_customers = ?,
                        active_operators = ?, updated_at = CURRENT_TIMESTAMP
                    WHERE report_date = ?
                """
                cursor.execute(update_sql, (
                    production_metrics[1] or 0, production_metrics[0] or 0, production_metrics[2] or 0,
                    production_metrics[3] or 0, production_metrics[4] or 0, production_metrics[5] or 0,
                    active_operators, report_date
                ))
            else:
                # Insert new summary
                insert_sql = """
                    INSERT INTO dm_daily_summary (
                        report_date, orders_quantity, production_launched, production_finished,
                        quilting_completed, sewing_completed, unique_customers, active_operators
                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                """
                cursor.execute(insert_sql, (
                    report_date, production_metrics[1] or 0, production_metrics[0] or 0, production_metrics[2] or 0,
                    production_metrics[3] or 0, production_metrics[4] or 0, production_metrics[5] or 0,
                    active_operators
                ))

            self.connection.commit()
            logger.info(f"Daily summary generated for {report_date}")
            return True

        except Exception as e:
            logger.error(f"Error generating daily summary: {e}")
            return False

    def clear_production_orders(self):
        """Delete all rows from the Daily Mirror production orders table"""
        try:
            cursor = self.connection.cursor()
            cursor.execute("DELETE FROM dm_production_orders")
            self.connection.commit()
            logger.info("All production orders deleted from dm_production_orders table.")
            return True
        except Exception as e:
            logger.error(f"Error deleting production orders: {e}")
            return False

    def _parse_date(self, date_value):
        """Parse date with better null handling"""
        if pd.isna(date_value) or date_value == 'nan' or date_value is None or date_value == '':
            return None

        try:
            if isinstance(date_value, str):
                # Handle various date formats
                for fmt in ['%Y-%m-%d', '%d/%m/%Y', '%m/%d/%Y', '%d.%m.%Y']:
                    try:
                        return datetime.strptime(date_value, fmt).date()
                    except ValueError:
                        continue

            elif hasattr(date_value, 'date'):
                return date_value.date()
            elif isinstance(date_value, datetime):
                return date_value.date()

            return None  # If all parsing attempts fail

        except Exception as e:
            logger.warning(f"Error parsing date {date_value}: {e}")
            return None

    def _parse_datetime(self, datetime_value):
        """Parse datetime value from Excel"""
        if pd.isna(datetime_value):
            return None
        if isinstance(datetime_value, str) and datetime_value == '00:00:00':
            return None
        return datetime_value

def setup_daily_mirror_database():
    """Setup the Daily Mirror database schema"""
    db = DailyMirrorDatabase()

    if not db.connect():
        return False

    try:
        success = db.create_database_schema()
        if success:
            print("✅ Daily Mirror database schema created successfully!")

            # Generate sample daily summary for today
            db.generate_daily_summary()

        return success
    finally:
        db.disconnect()

if __name__ == "__main__":
    setup_daily_mirror_database()