
    iR4                        d Z ddlmZ ddlZddlZddlmZmZ ddlm	Z	mZ ddl
mZmZ e G d d             Ze G d	 d
             ZddZddZddZddZddZy)zIParse Pertamina transaction detail PDFs (English and Indonesian formats).    )annotationsN)	dataclassfield)datedatetime)DecimalInvalidOperationc                  V    e Zd ZU ded<   ded<   ded<   dZded<   dZd	ed
<   dZded<   y)
ParsedItemstrkode_item_pertaminaproduct_namer   volume_literNz
str | Noneno_sodate | Nonetanggal_kirimsent_to_text)__name__
__module____qualname____annotations__r   r   r        6/var/www/html/spbu.com/backend/app/utils/pdf_parser.pyr   r      s2    E:!%M;%#L*#r   r   c                  *   e Zd ZU ded<   ded<    ee      Zded<    ed      Zd	ed
<    ed      Z	d	ed<    ed      Z
d	ed<    ed      Zd	ed<    ed      Zd	ed<    ed      Zd	ed<    ed      Zd	ed<    ed      Zd	ed<    ee      Zded<   y)	ParsedPDFr   booking_coder   tanggal)default_factoryzlist[ParsedItem]items0r   subtotaldiscountppnpph22pbbkbroundingdebit_credittotalz	list[str]warningsN)r   r   r   r   r   listr    r   r"   r#   r$   r%   r&   r'   r(   r)   r*   r   r   r   r   r      s    M#D9E9Hg$Hg$3<CS\E7!S\E7!Hg$#CLL'(S\E7!5Hi5r   r   c                   | j                         j                  dd      } t        j                  dd| t        j                        } d}| j                  d      s| j                  d      r"d}| j                  d	      j                  d
      } | j                  d      rd}| dd } d| v rad| v r]| j                  d      }| j                  d      }||kD  r#| j                  dd      j                  dd      } n| j                  dd      } nd| v rG| j                  d      }t        |      dkD  rt        d |dd D              rp| j                  dd      } n]d| v rY| j                  d      }t        |      dkD  r(t        d |dd D              r| j                  dd      } n| j                  dd      } 	 t        |       }|r| S |S # t        $ r t        d      cY S w xY w)zTParse a number string that may use Indonesian format (dot=thousands, comma=decimal).  z
\s*IDR\s*$)flagsF-(Tz-()   N,.c              3  8   K   | ]  }t        |      d k(    yw   Nlen.0ps     r   	<genexpr>z!_parse_decimal.<locals>.<genexpr>D        !Ay!#a&A+y   c              3  8   K   | ]  }t        |      d k(    ywr7   r9   r;   s     r   r>   z!_parse_decimal.<locals>.<genexpr>L   r?   r@   r!   )stripreplaceresub
IGNORECASE
startswithlstriprstriprfindsplitr:   allr   r	   )textnegativelast_dot
last_commapartsvals         r   _parse_decimalrS   &   s   ::<R(D66-T?DHsts3{{4 '',sABx d{sd{::c?ZZ_
 <<R(00c:D <<R(D	 

3u:>c!AuQRy!AA<<R(D	 

3u:>c!AuQRy!AA<<R(D<<S)Ddmt(S( s|s   4G G GGc                   | j                         } t        j                  d|       r$t        j                  | d      j                         S dD ](  }	 t        j                  | |      j                         c S  dddddd	d
dddddd}i dddddddddddd	dd
ddddddddddddddddd dd!d	d
dddddd"}i ||}t        j                  d#|       }|rgt        |j                  d            |j                  d      j                         t        |j                  d            }}}||v rt        |||   |      S t        j                  d$|       }|rgt        |j                  d            |j                  d      j                         t        |j                  d            }}}||v rt        |||   |      S y%# t        $ r Y w xY w)&z@Parse dates like '29 March 2026', '02 April 2026', '2026-03-30'.z^\d{4}-\d{2}-\d{2}$z%Y-%m-%d)z%d %B %Yz%d %b %Yr3      r8                  	   
         )januarifebruarimaretaprilmeijunijuliagustus	septemberoktobernovemberdesemberjanuaryfebruarymarchrb   mayjunejulyaugustrg   octoberri   decemberjanfebmaraprjun)julaugsepoctnovdecz(\d{1,2})\s+(\w+)\s+(\d{4})z(\d{1,2})([A-Za-z]+)(\d{4})N)
rB   rD   matchr   strptimer   
ValueErrorintgrouplower)	rM   fmt	id_months	en_months
all_monthsmday	month_stryears	            r   _parse_dater   X   sY   ::<D	xx&-  z27799'	$$T3/4466 ( !a!!Q1222I
1 !%,a18!q!#Q(0! 	Q "2 (22 8B2 	q	 	 "1	 ',Q	
 	q
 1Qr"RI ,I++J 	/6A"1771:
0@0@0BCPQ
OY
"j3S99 	/6A"1771:
0@0@0BCPQ
OY
"j3S99=  		s   #G

	GGc                   ddl }g }|j                  t        j                  |             5 }|j                  D ]&  }|j                         }|s|j                  |       ( 	 ddd       dj                  |      S # 1 sw Y   xY w)z,Extract full text from PDF using pdfplumber.r   N
)
pdfplumberopenioBytesIOpagesextract_textappendjoin)	pdf_bytesr   
text_partspdfpage	page_texts         r   _extract_textr      sm    J	I.	/3IID))+I!!),  
0
 99Z   
0	/s   "A;A;;Bc                    ddl }g }|j                  t        j                  |             5 }|j                  D ]&  }|j                         }|s|j                  |       ( 	 ddd       |S # 1 sw Y   |S xY w)z)Extract tables from PDF using pdfplumber.r   N)r   r   r   r   r   extract_tablesextend)r   r   
all_tablesr   r   tabless         r   _extract_tablesr      si    J	I.	/3IID((*F!!&)  
0
  
0
 s   "A,A,,A6c                6  ( t        |       }|j                         st        d      t        j                  dd|      j                         }d|v xs
 d|v xs d|v }d}t        j                  d|t        j                        }|r|j                  d      j                         }|sFt        j                  d	|t        j                        }|r|j                  d      j                         }|st        d
      d}t        j                  d|t        j                        }|s%t        j                  d|t        j                        }|r(t        |j                  d      j                               }|;t        j                  d|      D ]"  }t        |j                  d            }	|	s |	} n |t        d      t        ||      }
dgdgdgdgdgdgdgdgd}|j                         D ]k  \  }}|D ]a  }t        j                  ||t        j                  t        j                  z        }|s<t        |
|t        |j                  d                    k m t!        |       }|D ]  }|rt#        |      dk  rd}d}t%        |      D ]S  \  }}t        j                  dddj'                  d |D                    j                         }d|v sFd|v sd |v sO|}|} n |}i }t%        |      D ]  \  }}t        j                  dd|xs d      j                         }|j)                  d      r||d<   F|d!v r||d<   P|d"k(  r||d"<   [d#|v s|d$k(  s|j+                  d$      r||d$<   {d|v sd%|v r||d&<   d'|v sd(|v s||d)<    d|vsd|vr-||dz   d D ]b  }|r%t#        |      t-        |j/                               k  r,||d      xs dj                         (||d      xs dj                         }(r|set1        (fd*d+D              rzd}(}t        j2                  d,(t        j4                        }|r>|j                  d      j                         }|j                  d      j                         }t        j                  d-d|      }t        j                  dd|      j                         }t        |      }d}d"|v r(||d"      xs dj                         j7                         }|d.k(  r |t9        d/      kD  r|} n.|t9        d0      z  } n|t9        d1      k  r|t9        d0      z  } n|} d}!d$|v r#||d$      xs dj                         }"|"r|"dk7  r|"}!d}#d&|v r'||d&      xs dj                         }$|$rt        |$      }#d}%d)|v rA||d)      xs dj                         }&|&r%t        j                  dd|&      j                         }%|
j                  j;                  t=        ||| |!|#|%2             e  |
j                  s|
j>                  j;                  d3       |
j                  ra|
j                  d4   j@                  rH|
j                  D ]8  }'|'j@                  s|'j@                  |
j                  d4   j@                  k7  s7 |
S  	 |
S )5zDParse a Pertamina transaction detail PDF and return structured data.z*PDF tidak mengandung teks yang bisa dibacaz\s+r.   kodebooking	terbilangtanggalkirimz3Kode\s*Booking\s*[:\s]*([A-Za-z0-9][A-Za-z0-9\-/]*)r3   z3Booking\s*Code\s*[:\s]*([A-Za-z0-9][A-Za-z0-9\-/]*)z8Tidak dapat menemukan Booking Code / Kode Booking di PDFNz5(?:^|\s)Tanggal\s*[:\s]*(\d{1,2}\s*[A-Za-z]+\s*\d{4})z2(?:^|\s)Date\s*[:\s]*(\d{1,2}\s*[A-Za-z]+\s*\d{4})z(\d{1,2}\s*[A-Za-z]+\s*\d{4})z.Tidak dapat menemukan tanggal transaksi di PDF)r   r   z,(?:Sub\s*total|Subtotal)\s+([\d.,\-]+)\s*IDRz'(?:Discount|Diskon)\s+([\d.,\-]+)\s*IDRz(?:VAT|PPN)\s+([\d.,\-]+)\s*IDRz-(?:Income\s*Tax|PPh\s*22)\s+([\d.,\-]+)\s*IDRz"(?:MVFT|PBBKB)\s+([\d.,\-]+)\s*IDRz+(?:Rounding|Pembulatan)\s+([\d.,\-]+)\s*IDRzD(?:Debit\s*(?:or|/)\s*Credit|Debet\s*/\s*Credit)\s+([\d.,\-]+)\s*IDRz^Total\s+([\d.,\-]+)\s*IDR)r"   r#   r$   r%   r&   r'   r(   r)   rU   r-   c              3  (   K   | ]
  }|xs d   yw)r.   Nr   )r<   cs     r   r>   z&parse_pertamina_pdf.<locals>.<genexpr>   s     2JcAGGcs   itemqtyquantity)r   r   uomzno.sososentdate	sent_date	dikirimkesenttosent_toc              3  B   K   | ]  }|j                         v   y w)N)r   )r<   kw	item_cells     r   r>   z&parse_pertamina_pdf.<locals>.<genexpr>  s      b5ar2**5as   )r"   r)   amountr   u   (A\d+)\s*[-–]\s*(.+)z,\s*BULKKL9991000100)r   r   r   r   r   r   z3Tidak ada line item yang berhasil di-parse dari PDFr   )!r   rB   r   rD   rE   r   searchrF   r   	MULTILINEr   finditerr   r    setattrrS   r   r:   	enumerater   rG   endswithmaxvaluesanyr   DOTALLupperr   r   r   r*   r   ))r   rM   	text_normis_indonesianr   bc_matchr   
date_matchr   dresultcost_patterns
field_namepatternspatternr   table
header_row
header_idxirowrow_normcolsjcell	cell_normqty_cellkoder   
item_match
volume_rawr   r   r   so_valr   date_valr   sent_valr   r   s)                                           @r   parse_pertamina_pdfr      s   #D::<EFF vr4(..0I!Y.i+2Jin`iNiM Lyy>bmmH ~~a(..099B"--
 #>>!,224LSTT G@bllJ YYA",,

 j..q1779:=tDAAGGAJ'A	 E
 IJJL'BF EE?@23BC78CD`a/0	M !. 3 3 5
HG		'4)EFA
N1771:,FG	   !6 Y'FE
Q

&FAsvvfb#((2Jc2J*JKQQSH!u'8J(<R 

 '   ,GAtvrDJB8>>@I##F+ V11Ue#UI%d):i>P>PQU>VT
9,
i0G$%[!	)X-B"#Y -  d!2 a)C#c(c$++-&88T&\*0b779IDK(.B557HHb5abb D$L";Y		RJ!''*002)//288:66+r<@L66&#|<BBDL (1JC}4;'-2446<<>d{ .#-L#-#?Lgen,)GFO;) Et|d4j//R668fl"E !Md"[ 128b??A$/$9M  LD Y06B==?#%66&#x#@#F#F#HLLL
$())+)! C *M ` <<TU ||Q--LLDzzdjjFLLO,A,AA
 M !
 Mr   )rM   r   returnr   )rM   r   r   r   )r   bytesr   r   )r   r   r   zlist[list[list[str]]])r   r   r   r   )__doc__
__future__r   r   rD   dataclassesr   r   r   r   decimalr   r	   r   r   rS   r   r   r   r   r   r   r   <module>r      sh    O " 	 	 ( # - $ $ $ 6 6 6/d(V	!	Fr   