Files
quality_recticel/py_app/app/daily_mirror_db_setup.py
Quality System Admin 87469ecb8e starting daily mirror
2025-10-25 02:15:54 +03:00

744 lines
33 KiB
Python

"""
Daily Mirror Database Setup and Management
Quality Recticel Application
This script creates the database schema and provides utilities for
data import and Daily Mirror reporting functionality.
"""
import mariadb
import pandas as pd
import os
from datetime import datetime, timedelta
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class DailyMirrorDatabase:
def __init__(self, host='localhost', user='trasabilitate', password='Initial01!', database='trasabilitate'):
self.host = host
self.user = user
self.password = password
self.database = database
self.connection = None
def connect(self):
"""Establish database connection"""
try:
self.connection = mariadb.connect(
host=self.host,
user=self.user,
password=self.password,
database=self.database
)
logger.info("Database connection established")
return True
except Exception as e:
logger.error(f"Database connection failed: {e}")
return False
def disconnect(self):
"""Close database connection"""
if self.connection:
self.connection.close()
logger.info("Database connection closed")
def create_database_schema(self):
"""Create the Daily Mirror database schema"""
try:
cursor = self.connection.cursor()
# Read and execute the schema file
schema_file = os.path.join(os.path.dirname(__file__), 'daily_mirror_database_schema.sql')
if not os.path.exists(schema_file):
logger.error(f"Schema file not found: {schema_file}")
return False
with open(schema_file, 'r') as file:
schema_sql = file.read()
# Split by statements and execute each one
statements = []
current_statement = ""
for line in schema_sql.split('\n'):
line = line.strip()
if line and not line.startswith('--'):
current_statement += line + " "
if line.endswith(';'):
statements.append(current_statement.strip())
current_statement = ""
# Add any remaining statement
if current_statement.strip():
statements.append(current_statement.strip())
for statement in statements:
if statement and any(statement.upper().startswith(cmd) for cmd in ['CREATE', 'ALTER', 'DROP', 'INSERT']):
try:
cursor.execute(statement)
logger.info(f"Executed: {statement[:80]}...")
except Exception as e:
if "already exists" not in str(e).lower():
logger.warning(f"Error executing statement: {e}")
self.connection.commit()
logger.info("Database schema created successfully")
return True
except Exception as e:
logger.error(f"Error creating database schema: {e}")
return False
def import_production_data(self, file_path):
"""Import production data from Excel file (Comenzi Productie format)"""
try:
# The correct data is in the first sheet (DataSheet)
df = None
sheet_used = None
# Get available sheets
excel_file = pd.ExcelFile(file_path)
logger.info(f"Available sheets: {excel_file.sheet_names}")
# Try DataSheet first (where the actual production data is), then fallback options
sheet_attempts = [
('DataSheet', 'openpyxl'),
('DataSheet', 'xlrd'),
(0, 'openpyxl'),
(0, 'xlrd'),
('Sheet1', 'openpyxl'), # fallback to Sheet1 if DataSheet fails
(1, 'openpyxl')
]
for sheet_name, engine in sheet_attempts:
try:
logger.info(f"Trying to read sheet '{sheet_name}' with engine '{engine}'")
df = pd.read_excel(file_path, sheet_name=sheet_name, engine=engine, header=0)
sheet_used = f"{sheet_name} (engine: {engine})"
logger.info(f"Successfully read from sheet: {sheet_used}")
break
except Exception as e:
logger.warning(f"Failed to read sheet {sheet_name} with {engine}: {e}")
continue
# If all engines fail on DataSheet, try a different approach
if df is None:
try:
logger.info("Trying alternative method: reading without specifying engine")
df = pd.read_excel(file_path, sheet_name='DataSheet')
sheet_used = "DataSheet (default engine)"
logger.info("Successfully read with default engine")
except Exception as e:
logger.error(f"Failed with default engine: {e}")
raise Exception("Could not read the DataSheet from the Excel file. The file may be corrupted.")
logger.info(f"Loaded production data from {sheet_used}: {len(df)} rows, {len(df.columns)} columns")
logger.info(f"Available columns: {list(df.columns)}")
cursor = self.connection.cursor()
success_count = 0
created_count = 0
updated_count = 0
error_count = 0
# Prepare insert statement
insert_sql = """
INSERT INTO dm_production_orders (
production_order, customer_code, client_order, article_code,
article_description, quantity_requested, delivery_date, production_status,
end_of_quilting, end_of_sewing, t1_status, t1_registration_date, t1_operator_name,
t2_status, t2_registration_date, t2_operator_name, t3_status, t3_registration_date,
t3_operator_name, machine_code, machine_type, classification, total_norm_time,
data_deschiderii, model_lb2, data_planificare, machine_number, design_number, needle_position
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE
customer_code = VALUES(customer_code),
client_order = VALUES(client_order),
article_code = VALUES(article_code),
article_description = VALUES(article_description),
quantity_requested = VALUES(quantity_requested),
delivery_date = VALUES(delivery_date),
production_status = VALUES(production_status),
updated_at = CURRENT_TIMESTAMP
"""
for index, row in df.iterrows():
try:
# Prepare data tuple
data = (
row.get('Comanda Productie', ''),
row.get('Customer', ''),
row.get('Comanda client', ''),
row.get('Cod Articol', ''),
row.get('Descriere', ''),
row.get('Cantitate ceruta', 0),
self._parse_date(row.get('Delivery date')),
row.get('Status', ''),
self._parse_datetime(row.get('End of Quilting')),
self._parse_datetime(row.get('End of sewing')),
row.get('T1', 0),
self._parse_datetime(row.get('Data inregistrare T1')),
row.get('Numele Complet T1', ''),
row.get('T2', 0),
self._parse_datetime(row.get('Data inregistrare T2')),
row.get('Numele Complet T2', ''),
row.get('T3', 0),
self._parse_datetime(row.get('Data inregistrare T3')),
row.get('Numele Complet T3', ''),
row.get('Masina Cusut ', ''),
row.get('Tip Masina', ''),
row.get('Clasificare', ''),
row.get('Timp normat total', 0),
self._parse_date(row.get('Data Deschiderii')),
row.get('Model Lb2', ''),
self._parse_date(row.get('Data Planific.')),
row.get('Numar masina', ''),
row.get('Design nr', 0),
row.get('Needle position', 0)
)
cursor.execute(insert_sql, data)
# Check if row was inserted (created) or updated
# In MySQL with ON DUPLICATE KEY UPDATE:
# - rowcount = 1 means INSERT (new row created)
# - rowcount = 2 means UPDATE (existing row updated)
# - rowcount = 0 means no change
if cursor.rowcount == 1:
created_count += 1
elif cursor.rowcount == 2:
updated_count += 1
success_count += 1
except Exception as row_error:
logger.warning(f"Error processing row {index}: {row_error}")
error_count += 1
continue
self.connection.commit()
logger.info(f"Production data import completed: {success_count} successful, {error_count} failed")
return {
'success_count': success_count,
'created_count': created_count,
'updated_count': updated_count,
'error_count': error_count,
'total_rows': len(df)
}
except Exception as e:
logger.error(f"Error importing production data: {e}")
return None
def import_orders_data(self, file_path):
"""Import orders data from Excel file with enhanced error handling"""
try:
# Ensure we have a database connection
if not self.connection:
self.connect()
if not self.connection:
return {
'success_count': 0,
'error_count': 1,
'total_rows': 0,
'error_message': 'Could not establish database connection.'
}
logger.info(f"Attempting to import orders data from: {file_path}")
# Check if file exists
if not os.path.exists(file_path):
logger.error(f"Orders file not found: {file_path}")
return {
'success_count': 0,
'error_count': 1,
'total_rows': 0,
'error_message': f'Orders file not found: {file_path}'
}
# Try to get sheet names first
try:
excel_file = pd.ExcelFile(file_path)
sheet_names = excel_file.sheet_names
logger.info(f"Available sheets in orders file: {sheet_names}")
except Exception as e:
logger.warning(f"Could not get sheet names: {e}")
sheet_names = ['DataSheet', 'Sheet1']
# Try multiple approaches to read the Excel file
df = None
sheet_used = None
approaches = [
('openpyxl', 0, 'read_only'),
('openpyxl', 0, 'normal'),
('openpyxl', 1, 'normal'),
('xlrd', 0, 'normal') if file_path.endswith('.xls') else None,
('default', 0, 'normal')
]
for approach in approaches:
if approach is None:
continue
engine, sheet_name, mode = approach
try:
logger.info(f"Trying to read orders with engine: {engine}, sheet: {sheet_name}, mode: {mode}")
if engine == 'default':
df = pd.read_excel(file_path, sheet_name=sheet_name, header=0)
elif mode == 'read_only':
df = pd.read_excel(file_path, sheet_name=sheet_name, engine=engine, header=0)
else:
df = pd.read_excel(file_path, sheet_name=sheet_name, engine=engine, header=0)
sheet_used = f"{engine} (sheet: {sheet_name}, mode: {mode})"
logger.info(f"Successfully read orders data with: {sheet_used}")
break
except Exception as e:
logger.warning(f"Failed with {engine}, sheet {sheet_name}, mode {mode}: {e}")
continue
if df is None:
logger.error("Could not read the orders file with any method")
return {
'success_count': 0,
'error_count': 1,
'total_rows': 0,
'error_message': 'Could not read the orders Excel file. The file may have formatting issues or be corrupted.'
}
logger.info(f"Loaded orders data from {sheet_used}: {len(df)} rows, {len(df.columns)} columns")
logger.info(f"Available columns: {list(df.columns)[:10]}...")
cursor = self.connection.cursor()
success_count = 0
created_count = 0
updated_count = 0
error_count = 0
# Prepare insert statement for orders
insert_sql = """
INSERT INTO dm_orders (
order_id, customer_code, customer_name, client_order,
article_code, article_description, quantity_requested, delivery_date,
order_status, product_group, order_date
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE
customer_code = VALUES(customer_code),
customer_name = VALUES(customer_name),
client_order = VALUES(client_order),
article_code = VALUES(article_code),
article_description = VALUES(article_description),
quantity_requested = VALUES(quantity_requested),
delivery_date = VALUES(delivery_date),
order_status = VALUES(order_status),
product_group = VALUES(product_group),
order_date = VALUES(order_date),
updated_at = CURRENT_TIMESTAMP
"""
# Process each row with the actual column mapping and better null handling
for index, row in df.iterrows():
try:
# Helper function to safely get values and handle NaN
def safe_get(row, column, default=''):
value = row.get(column, default)
if pd.isna(value) or value == 'nan':
return default
return str(value).strip() if isinstance(value, str) else value
def safe_get_int(row, column, default=0):
value = row.get(column, default)
if pd.isna(value) or value == 'nan':
return default
try:
return int(float(value)) if value != '' else default
except (ValueError, TypeError):
return default
# Map columns based on the actual Vizual. Artic. Comenzi Deschise format
data = (
safe_get(row, 'Comanda', f'ORD_{index:06d}'), # Order ID
safe_get(row, 'Cod. Client'), # Customer Code
safe_get(row, 'Customer Name'), # Customer Name
safe_get(row, 'Com. Achiz. Client'), # Client Order
safe_get(row, 'Cod Articol'), # Article Code
safe_get(row, 'Part Description', safe_get(row, 'Descr. Articol')), # Article Description
safe_get_int(row, 'Cantitate'), # Quantity
self._parse_date(row.get('Data livrare')), # Delivery Date
safe_get(row, 'Statut Comanda', 'PENDING'), # Order Status
safe_get(row, 'Model'), # Product Group
self._parse_date(row.get('Data Comenzii')) # Order Date
)
cursor.execute(insert_sql, data)
# Track created vs updated
if cursor.rowcount == 1:
created_count += 1
elif cursor.rowcount == 2:
updated_count += 1
success_count += 1
except Exception as row_error:
logger.warning(f"Error processing row {index}: {row_error}")
error_count += 1
continue
self.connection.commit()
logger.info(f"Orders import completed: {success_count} successful, {error_count} errors")
return {
'success_count': success_count,
'created_count': created_count,
'updated_count': updated_count,
'error_count': error_count,
'total_rows': len(df),
'error_message': None if error_count == 0 else f'{error_count} rows failed to import'
}
except Exception as e:
logger.error(f"Error importing orders data: {e}")
return {
'success_count': 0,
'error_count': 1,
'total_rows': 0,
'error_message': str(e)
}
def import_delivery_data(self, file_path):
"""Import delivery data from Excel file with enhanced error handling"""
try:
# Ensure we have a database connection
if not self.connection:
self.connect()
if not self.connection:
return {
'success_count': 0,
'error_count': 1,
'total_rows': 0,
'error_message': 'Could not establish database connection.'
}
logger.info(f"Attempting to import delivery data from: {file_path}")
# Check if file exists
if not os.path.exists(file_path):
logger.error(f"Delivery file not found: {file_path}")
return {
'success_count': 0,
'error_count': 1,
'total_rows': 0,
'error_message': f'Delivery file not found: {file_path}'
}
# Try to get sheet names first
try:
excel_file = pd.ExcelFile(file_path)
sheet_names = excel_file.sheet_names
logger.info(f"Available sheets in delivery file: {sheet_names}")
except Exception as e:
logger.warning(f"Could not get sheet names: {e}")
sheet_names = ['DataSheet', 'Sheet1']
# Try multiple approaches to read the Excel file
df = None
sheet_used = None
approaches = [
('openpyxl', 0, 'read_only'),
('openpyxl', 0, 'normal'),
('openpyxl', 1, 'normal'),
('xlrd', 0, 'normal') if file_path.endswith('.xls') else None,
('default', 0, 'normal')
]
for approach in approaches:
if approach is None:
continue
engine, sheet_name, mode = approach
try:
logger.info(f"Trying to read delivery data with engine: {engine}, sheet: {sheet_name}, mode: {mode}")
if engine == 'default':
df = pd.read_excel(file_path, sheet_name=sheet_name, header=0)
elif mode == 'read_only':
df = pd.read_excel(file_path, sheet_name=sheet_name, engine=engine, header=0)
else:
df = pd.read_excel(file_path, sheet_name=sheet_name, engine=engine, header=0)
sheet_used = f"{engine} (sheet: {sheet_name}, mode: {mode})"
logger.info(f"Successfully read delivery data with: {sheet_used}")
break
except Exception as e:
logger.warning(f"Failed with {engine}, sheet {sheet_name}, mode {mode}: {e}")
continue
if df is None:
logger.error("Could not read the delivery file with any method")
return {
'success_count': 0,
'error_count': 1,
'total_rows': 0,
'error_message': 'Could not read the delivery Excel file. The file may have formatting issues or be corrupted.'
}
logger.info(f"Loaded delivery data from {sheet_used}: {len(df)} rows, {len(df.columns)} columns")
logger.info(f"Available columns: {list(df.columns)[:10]}...")
cursor = self.connection.cursor()
success_count = 0
created_count = 0
updated_count = 0
error_count = 0
# Prepare insert statement for deliveries
insert_sql = """
INSERT INTO dm_deliveries (
shipment_id, order_id, customer_code, customer_name,
article_code, article_description, quantity_delivered,
shipment_date, delivery_date, delivery_status, total_value
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE
customer_code = VALUES(customer_code),
customer_name = VALUES(customer_name),
article_code = VALUES(article_code),
article_description = VALUES(article_description),
quantity_delivered = VALUES(quantity_delivered),
shipment_date = VALUES(shipment_date),
delivery_date = VALUES(delivery_date),
delivery_status = VALUES(delivery_status),
total_value = VALUES(total_value),
updated_at = CURRENT_TIMESTAMP
"""
# Process each row with the actual column mapping and better null handling
for index, row in df.iterrows():
try:
# Helper function to safely get values and handle NaN
def safe_get(row, column, default=''):
value = row.get(column, default)
if pd.isna(value) or value == 'nan':
return default
return str(value).strip() if isinstance(value, str) else value
def safe_get_float(row, column, default=0.0):
value = row.get(column, default)
if pd.isna(value) or value == 'nan':
return default
try:
return float(value) if value != '' else default
except (ValueError, TypeError):
return default
def safe_get_int(row, column, default=0):
value = row.get(column, default)
if pd.isna(value) or value == 'nan':
return default
try:
return int(float(value)) if value != '' else default
except (ValueError, TypeError):
return default
# Map columns based on the actual Articole livrate_returnate format
data = (
safe_get(row, 'Document Number', f'SH_{index:06d}'), # Shipment ID
safe_get(row, 'Comanda'), # Order ID
safe_get(row, 'Cod. Client'), # Customer Code
safe_get(row, 'Nume client'), # Customer Name
safe_get(row, 'Cod Articol'), # Article Code
safe_get(row, 'Part Description'), # Article Description
safe_get_int(row, 'Cantitate'), # Quantity Delivered
self._parse_date(row.get('Data')), # Shipment Date
self._parse_date(row.get('Data')), # Delivery Date (same as shipment for now)
safe_get(row, 'Stare', 'DELIVERED'), # Delivery Status
safe_get_float(row, 'Total Price') # Total Value
)
cursor.execute(insert_sql, data)
# Track created vs updated
if cursor.rowcount == 1:
created_count += 1
elif cursor.rowcount == 2:
updated_count += 1
success_count += 1
except Exception as row_error:
logger.warning(f"Error processing delivery row {index}: {row_error}")
error_count += 1
continue
self.connection.commit()
logger.info(f"Delivery import completed: {success_count} successful, {error_count} errors")
return {
'success_count': success_count,
'created_count': created_count,
'updated_count': updated_count,
'error_count': error_count,
'total_rows': len(df),
'error_message': None if error_count == 0 else f'{error_count} rows failed to import'
}
except Exception as e:
logger.error(f"Error importing delivery data: {e}")
return {
'success_count': 0,
'error_count': 1,
'total_rows': 0,
'error_message': str(e)
}
def generate_daily_summary(self, report_date=None):
"""Generate daily summary for Daily Mirror reporting"""
if not report_date:
report_date = datetime.now().date()
try:
cursor = self.connection.cursor()
# Check if summary already exists for this date
cursor.execute("SELECT id FROM dm_daily_summary WHERE report_date = ?", (report_date,))
existing = cursor.fetchone()
# Get production metrics
cursor.execute("""
SELECT
COUNT(*) as total_orders,
SUM(quantity_requested) as total_quantity,
SUM(CASE WHEN production_status = 'Inchis' THEN 1 ELSE 0 END) as completed_orders,
SUM(CASE WHEN end_of_quilting IS NOT NULL THEN 1 ELSE 0 END) as quilting_done,
SUM(CASE WHEN end_of_sewing IS NOT NULL THEN 1 ELSE 0 END) as sewing_done,
COUNT(DISTINCT customer_code) as unique_customers
FROM dm_production_orders
WHERE DATE(data_planificare) = ?
""", (report_date,))
production_metrics = cursor.fetchone()
# Get active operators count
cursor.execute("""
SELECT COUNT(DISTINCT CASE
WHEN t1_operator_name IS NOT NULL THEN t1_operator_name
WHEN t2_operator_name IS NOT NULL THEN t2_operator_name
WHEN t3_operator_name IS NOT NULL THEN t3_operator_name
END) as active_operators
FROM dm_production_orders
WHERE DATE(data_planificare) = ?
""", (report_date,))
operator_metrics = cursor.fetchone()
active_operators = operator_metrics[0] or 0
if existing:
# Update existing summary
update_sql = """
UPDATE dm_daily_summary SET
orders_quantity = ?, production_launched = ?, production_finished = ?,
quilting_completed = ?, sewing_completed = ?, unique_customers = ?,
active_operators = ?, updated_at = CURRENT_TIMESTAMP
WHERE report_date = ?
"""
cursor.execute(update_sql, (
production_metrics[1] or 0, production_metrics[0] or 0, production_metrics[2] or 0,
production_metrics[3] or 0, production_metrics[4] or 0, production_metrics[5] or 0,
active_operators, report_date
))
else:
# Insert new summary
insert_sql = """
INSERT INTO dm_daily_summary (
report_date, orders_quantity, production_launched, production_finished,
quilting_completed, sewing_completed, unique_customers, active_operators
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
"""
cursor.execute(insert_sql, (
report_date, production_metrics[1] or 0, production_metrics[0] or 0, production_metrics[2] or 0,
production_metrics[3] or 0, production_metrics[4] or 0, production_metrics[5] or 0,
active_operators
))
self.connection.commit()
logger.info(f"Daily summary generated for {report_date}")
return True
except Exception as e:
logger.error(f"Error generating daily summary: {e}")
return False
def clear_production_orders(self):
"""Delete all rows from the Daily Mirror production orders table"""
try:
cursor = self.connection.cursor()
cursor.execute("DELETE FROM dm_production_orders")
self.connection.commit()
logger.info("All production orders deleted from dm_production_orders table.")
return True
except Exception as e:
logger.error(f"Error deleting production orders: {e}")
return False
def _parse_date(self, date_value):
"""Parse date with better null handling"""
if pd.isna(date_value) or date_value == 'nan' or date_value is None or date_value == '':
return None
try:
if isinstance(date_value, str):
# Handle various date formats
for fmt in ['%Y-%m-%d', '%d/%m/%Y', '%m/%d/%Y', '%d.%m.%Y']:
try:
return datetime.strptime(date_value, fmt).date()
except ValueError:
continue
elif hasattr(date_value, 'date'):
return date_value.date()
elif isinstance(date_value, datetime):
return date_value.date()
return None # If all parsing attempts fail
except Exception as e:
logger.warning(f"Error parsing date {date_value}: {e}")
return None
def _parse_datetime(self, datetime_value):
"""Parse datetime value from Excel"""
if pd.isna(datetime_value):
return None
if isinstance(datetime_value, str) and datetime_value == '00:00:00':
return None
return datetime_value
def setup_daily_mirror_database():
"""Setup the Daily Mirror database schema"""
db = DailyMirrorDatabase()
if not db.connect():
return False
try:
success = db.create_database_schema()
if success:
print("✅ Daily Mirror database schema created successfully!")
# Generate sample daily summary for today
db.generate_daily_summary()
return success
finally:
db.disconnect()
if __name__ == "__main__":
setup_daily_mirror_database()