Spaces:

alwaysgood
/

my-tide-env

Sleeping

App Files Files Community

alwaysgood commited on Aug 19

Commit

7d0339b

verified ·

1 Parent(s): 12db48a

Update prediction.py

Browse files

Files changed (1) hide show

prediction.py +48 -12

prediction.py CHANGED Viewed

@@ -13,6 +13,7 @@ from config import STATION_NAMES
 from supabase_utils import (
     get_harmonic_predictions, save_predictions_to_supabase, get_supabase_client
 )
 def get_common_args(station_id):
     return [
@@ -23,19 +24,30 @@ def get_common_args(station_id):
     ]
 def validate_csv_file(file_path, required_rows=144):
-    """CSV 파일 유효성 검사"""
     try:
         df = pd.read_csv(file_path)
-        required_columns = ['date', 'air_pres', 'wind_dir', 'wind_speed', 'air_temp', 'residual']
-        missing_columns = [col for col in required_columns if col not in df.columns]
-        if missing_columns:
-            return False, f"필수 컬럼이 누락되었습니다: {missing_columns}"
         if len(df) < required_rows:
             return False, f"데이터가 부족합니다. 최소 {required_rows}행 필요, 현재 {len(df)}행"
-        return True, "파일이 유효합니다."
     except Exception as e:
         return False, f"파일 읽기 오류: {str(e)}"
@@ -231,12 +243,27 @@ def single_prediction(station_id, input_csv_file):
     if input_csv_file is None:
         raise gr.Error("예측을 위한 입력 파일을 업로드해주세요.")
     is_valid, message = validate_csv_file(input_csv_file.name)
     if not is_valid:
         raise gr.Error(f"파일 오류: {message}")
     station_name = STATION_NAMES.get(station_id, station_id)
     common_args = get_common_args(station_id)
     setting_name = f"long_term_forecast_{station_id}_144_72_TimeXer_TIDE_ftMS_sl144_ll96_pl72_dm256_nh8_el1_dl1_df512_expand2_dc4_fc3_ebtimeF_dtTrue_Exp_0"
     checkpoint_path = f"./checkpoints/{setting_name}/checkpoint.pth"
@@ -247,10 +274,11 @@ def single_prediction(station_id, input_csv_file):
     if not os.path.exists(scaler_path):
         raise gr.Error(f"스케일러 파일을 찾을 수 없습니다: {scaler_path}")
     command = ["python", "inference.py",
                "--checkpoint_path", checkpoint_path,
                "--scaler_path", scaler_path,
-               "--predict_input_file", input_csv_file.name] + common_args
     gr.Info(f"{station_name}({station_id}) 통합 조위 예측을 실행중입니다...")
@@ -261,12 +289,13 @@ def single_prediction(station_id, input_csv_file):
         if os.path.exists(prediction_file):
             residual_predictions = np.load(prediction_file)
-            input_df = pd.read_csv(input_csv_file.name)
-            input_df['date'] = pd.to_datetime(input_df['date'])
             last_time = input_df['date'].iloc[-1]
             prediction_results = calculate_final_tide(residual_predictions, station_id, last_time)
-            plot = create_enhanced_prediction_plot(prediction_results, input_csv_file, station_name)
             has_harmonic = any(h != 0 for h in prediction_results['harmonic'])
@@ -291,7 +320,14 @@ def single_prediction(station_id, input_csv_file):
             else:
                 save_message = "\n⚠️ Supabase 저장 실패"
-            return plot, result_df, f"✅ 예측 완료!{save_message}\n\n{output}"
         else:
             return None, None, f"❌ 결과 파일을 찾을 수 없습니다.\n\n{output}"
     except Exception as e:

 from supabase_utils import (
     get_harmonic_predictions, save_predictions_to_supabase, get_supabase_client
 )
+from preprocessing import preprocess_uploaded_file
 def get_common_args(station_id):
     return [
     ]
 def validate_csv_file(file_path, required_rows=144):
+    """CSV 파일 유효성 검사 - tide_level 또는 residual 지원"""
     try:
         df = pd.read_csv(file_path)
+        # 기본 필수 컬럼 (tide_level 또는 residual 중 하나는 있어야 함)
+        base_columns = ['date', 'air_pres', 'wind_dir', 'wind_speed', 'air_temp']
+        missing_base = [col for col in base_columns if col not in df.columns]
+        if missing_base:
+            return False, f"필수 컬럼이 누락되었습니다: {missing_base}"
+        # tide_level 또는 residual 중 하나는 있어야 함
+        has_tide_level = 'tide_level' in df.columns
+        has_residual = 'residual' in df.columns
+        if not has_tide_level and not has_residual:
+            return False, "tide_level 또는 residual 컬럼이 필요합니다."
         if len(df) < required_rows:
             return False, f"데이터가 부족합니다. 최소 {required_rows}행 필요, 현재 {len(df)}행"
+        data_type = "tide_level" if has_tide_level else "residual"
+        return True, f"파일이 유효합니다. (데이터 형태: {data_type})"
     except Exception as e:
         return False, f"파일 읽기 오류: {str(e)}"
     if input_csv_file is None:
         raise gr.Error("예측을 위한 입력 파일을 업로드해주세요.")
+    # 1. 초기 파일 검증
     is_valid, message = validate_csv_file(input_csv_file.name)
     if not is_valid:
         raise gr.Error(f"파일 오류: {message}")
     station_name = STATION_NAMES.get(station_id, station_id)
+    # 2. 전처리 수행 (tide_level → residual 변환 포함)
+    gr.Info(f"📊 {station_name}({station_id}) 데이터 전처리 중...")
+    processed_data, preprocess_result = preprocess_uploaded_file(input_csv_file.name, station_id)
+    if processed_data is None:
+        raise gr.Error(f"전처리 실패: {preprocess_result}")
+    # 전처리 결과가 문자열(에러)인지 딕셔너리(성공)인지 확인
+    if isinstance(preprocess_result, str):
+        raise gr.Error(f"전처리 오류: {preprocess_result}")
+    # 전처리된 파일 경로 사용
+    processed_file_path = preprocess_result['output_file']
     common_args = get_common_args(station_id)
     setting_name = f"long_term_forecast_{station_id}_144_72_TimeXer_TIDE_ftMS_sl144_ll96_pl72_dm256_nh8_el1_dl1_df512_expand2_dc4_fc3_ebtimeF_dtTrue_Exp_0"
     checkpoint_path = f"./checkpoints/{setting_name}/checkpoint.pth"
     if not os.path.exists(scaler_path):
         raise gr.Error(f"스케일러 파일을 찾을 수 없습니다: {scaler_path}")
+    # 전처리된 파일을 inference에 전달
     command = ["python", "inference.py",
                "--checkpoint_path", checkpoint_path,
                "--scaler_path", scaler_path,
+               "--predict_input_file", processed_file_path] + common_args
     gr.Info(f"{station_name}({station_id}) 통합 조위 예측을 실행중입니다...")
         if os.path.exists(prediction_file):
             residual_predictions = np.load(prediction_file)
+            # 전처리된 데이터 사용
+            input_df = processed_data
             last_time = input_df['date'].iloc[-1]
             prediction_results = calculate_final_tide(residual_predictions, station_id, last_time)
+            # 플롯은 전처리된 데이터 파일을 사용
+            plot = create_enhanced_prediction_plot(prediction_results, type('obj', (object,), {'name': processed_file_path}), station_name)
             has_harmonic = any(h != 0 for h in prediction_results['harmonic'])
             else:
                 save_message = "\n⚠️ Supabase 저장 실패"
+            # 전처리 정보 추가
+            preprocess_info = f"""📊 전처리 결과:
+- 원본 데이터: {preprocess_result['original_rows']}행
+- 처리 데이터: {preprocess_result['processed_rows']}행
+- Residual 평균: {preprocess_result['residual_mean']:.2f}cm
+- Residual 표준편차: {preprocess_result['residual_std']:.2f}cm"""
+            return plot, result_df, f"✅ 예측 완료!{save_message}\n\n{preprocess_info}\n\n{output}"
         else:
             return None, None, f"❌ 결과 파일을 찾을 수 없습니다.\n\n{output}"
     except Exception as e: