a :jg\*@sddlmZddlZddlZddlZddlZddlZddlZddlZddl Z ddl m Z m Z m Z mZmZdddddZd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2)Zddd3d4d5ZGd6d7d7eZd8dd9d:d;d<ZGd=d>d>eZGd?d@d@eZGdAdBdBeZGdCdDdDZGdEdFdFZGdGdHdHee Ze rejeeefe fZ nejZ GdIdJdJe Z!GdKdLdLZ"GdMdNdNZ#dOddPdQdRZ$GdSdTdTZ%dS)U) annotationsN)IO TYPE_CHECKINGAny NamedTupleUnionstrbytessreturncCstj|dS)N utf_16_be)codecs BOM_UTF16_BEencode)r r7/usr/local/lib/python3.9/site-packages/PIL/PdfParser.py encode_textsru˘uˇuˆu˙u˝u˛u˚u˜u•u†u‡u…u—u–uƒu⁄u‹u›u−u‰u„u“u”u‘u’u‚u™ufiufluŁuŒuŠuŸuŽuıułuœušužu€)))br cCsH|dttjtjkr0|ttjddSddd|DSdS)Nr css|]}t|t|VqdSN)PDFDocEncodinggetchr).0byterrr Ezdecode_text..)lenrrdecodejoin)r>rrr decode_textAsrKc@seZdZdZdS)PdfFormatErrorz\An error that probably indicates a syntactic or semantic error in the PDF file structureN)__name__ __module__ __qualname____doc__rrrrrLHsrLboolNone) condition error_messager cCs|s t|dSr@)rL)rSrTrrrcheck_format_conditionOsrUc@seZdZUded<ded<dS)IndirectReferenceTupleint object_id generationN)rMrNrO__annotations__rrrrrVTs rVc@sVeZdZddddZddddZdd d d d Zdd d d dZddddZdS)IndirectReferencerr cCs|jd|jdS)N z RrXrYselfrrr__str__ZszIndirectReference.__str__r cCs|dSNus-ascii)rarr_rrr __bytes__]szIndirectReference.__bytes__objectrQotherr cCs6|j|jurdSt|tsJ|j|jko4|j|jkS)NF) __class__ isinstancer[rXrYr`rgrrr__eq__`s zIndirectReference.__eq__cCs ||k Sr@rrjrrr__ne__fszIndirectReference.__ne__rWcCst|j|jfSr@)hashrXrYr_rrr__hash__iszIndirectReference.__hash__N)rMrNrOrardrkrlrnrrrrr[Ys r[c@seZdZddddZdS)IndirectObjectDefrr\cCs|jd|jdS)Nr]z objr^r_rrrranszIndirectObjectDef.__str__N)rMrNrOrarrrrromsroc@seZdZddddZdddddd Zddd d d Zddd d dZddd ddZddddZddddZ dddddZ dS) XrefTablerRr\cCs i|_i|_ddi|_d|_dS)NriF)existing_entries new_entriesdeleted_entriesreading_finishedr_rrr__init__ss  zXrefTable.__init__rWztuple[int, int]keyvaluer cCs2|jr||j|<n ||j|<||jvr.|j|=dSr@)rtrrrqrsr`rwrxrrr __setitem__}s    zXrefTable.__setitem__rwr cCs.z |j|WSty(|j|YS0dSr@)rrKeyErrorrqr`rwrrr __getitem__s  zXrefTable.__getitem__cCs||jvr0|j|dd}|j|=||j|<nR||jvrX|j|dd}||j|<n*||jvrn|j|}nd|d}t|dS)Nz object ID z+ cannot be deleted because it doesn't exist)rrrsrq IndexError)r`rwrYmsgrrr __delitem__s       zXrefTable.__delitem__rQcCs||jvp||jvSr@)rqrrr}rrr __contains__szXrefTable.__contains__cCs.tt|jt|jBt|jBSr@)rHsetrqkeysrrrsr_rrr__len__s   zXrefTable.__len__zset[int]cCs*t|jt|jt|jBSr@)rrqrrsrrr_rrrrs zXrefTable.keys IO[bytes]fr c CsJtt|jt|jB}tt|j}|}|d|rFd}t|D]>\}}|dusr|d|krx|}qV|d|}||d}qqV|}g}|d|dt|f|D]} | |jvr|d|j| q| d} t | | kd| d| z |d} Wnt y(d} Yn0|d| |j| fqqD|S) Nsxref rs%d %d rs%010d %05d n z*expected the next deleted object ID to be z, instead found s%010d %05d f ) sortedrrrrrstellwrite enumeraterHpoprUr) r`rrZ deleted_keysZ startxrefprevindexrwZcontiguous_keysrXZthis_deleted_object_idZnext_in_linked_listrrrrsF         zXrefTable.writeN) rMrNrOrurzr~rrrrrrrrrrprs rpc@seZdZUded<dddddZdd d d Zd d dddZdd ddZdd ddZe dddddZ e e dddddDZ dd ddZd S)!PdfNamer namezPdfName | bytes | strrR)rr cCs6t|tr|j|_nt|tr&||_n |d|_dSrb)rirrr r)r`rrrrrus    zPdfName.__init__rr\cCs |jdSrb)rrIr_rrr name_as_strszPdfName.name_as_strrerQrfcCs t|tr|j|jkp||jkSr@)rirrrjrrrrkszPdfName.__eq__rWcCs t|jSr@)rmrr_rrrrnszPdfName.__hash__cCs|jjdt|jdS)N())rhrMreprrr_rrr__repr__szPdfName.__repr__datar cCs|t|Sr@) PdfParserinterpret_name)clsrrrrfrom_pdf_streamszPdfName.from_pdf_stream!cCsh|] }t|qSr)ord)rDcrrr rGzPdfName.z #%/()<>[]{}cCs@td}|jD](}||jvr(||q|d|qt|S)N/s#%02X) bytearrayr allowed_charsappendextendr )r`resultr>rrrrds    zPdfName.__bytes__N)rMrNrOrZrurrkrnr classmethodrrrangerrdrrrrrs rc@seZdZddddZdS)PdfArrayr r\cCsdddd|DdS)Ns[  css|]}t|VqdSr@)pdf_repr)rDxrrrrFrGz%PdfArray.__bytes__..s ])rJr_rrrrdszPdfArray.__bytes__N)rMrNrOrdrrrrrsrc@s<eZdZddddddZdddd d Zd d d dZdS)PdfDictrrrRrvcCs,|dkrtj|||n|||d<dS)Nrrc) collectionsUserDict __setattr__rryrrrrszPdfDict.__setattr__zstr | time.struct_timer{c Cs.z||d}Wn.ty@}zt||WYd}~n d}~00t|trTt|}|dr*|drv|dd}d}t|dkr|d}t |ddd }t|d kr|t |d d 7}d dt|d}t |dt|d|}|d vr*|d 9}|dkr|d9}t t ||}|S)NrcDatezD:Z<z %Y%m%d%H%M%S)+-r)rr|AttributeErrorrir rKendswith startswithrHrWtimestrptimegmtimecalendartimegm)r`rwrxeZ relationshipoffsetformatrrr __getattr__ s.         zPdfDict.__getattr__r r\cCsntd}|D]J\}}|dur"qt|}|d|tt||d||q|dt|S)N<< rs >>)ritemsrrr r)r`outrwrxrrrrd&s    zPdfDict.__bytes__N)rMrNrOrrrdrrrrrsrc@s*eZdZdddddZdddd Zd S) PdfBinaryzlist[int] | bytesrRrcCs ||_dSr@)r)r`rrrrru5szPdfBinary.__init__r r\cCsdddd|jDS)Ns<%s>rGcss|]}d|VqdS)s%02XNrrDr>rrrrF9rGz&PdfBinary.__bytes__..)rJrr_rrrrd8szPdfBinary.__bytes__N)rMrNrOrurdrrrrr4src@s,eZdZddddddZdddd Zd S) PdfStreamrr rR) dictionarybufr cCs||_||_dSr@)rr)r`rrrrrru=szPdfStream.__init__r\cCsz|jd}Wnty&|jYS0|dkrpz|jd}WntyZ|jd}Yn0tj|jt|dSdt|d}t|dS)NsFilters FlateDecodesDLLength)bufsizezstream filter z unknown/unsupported)rr|rzlib decompressrWrNotImplementedError)r`filterZexpected_lengthrrrrrIAs   zPdfStream.decodeN)rMrNrOrurIrrrrr<srr)rr cCs |dur dS|durdS|dur$dSt|ttttfr>t|St|ttfrZt| dSt|t j rdt d| dd St|t rtt|St|trtt|St|trtt|St|tr|d d }|d d }|d d}d |d St|SdS)NTstrueFsfalsesnullrcs(D:z %Y%m%d%H%M%SZ)\s\\(\(s\))rirrrrr rWfloatrrr struct_timestrftimedictlistrrreplace)rrrrrQs0            rc @seZdZdZdddddd d d d d ZddddZdd dddZd dddZd dddZd dddZ d dddZ d dddZ d d dd d!Z d"dd#d$Z d dd%d&Zdd'd d(d)d*Zd+d,d,d"d-d.d/Zd'd,d,d"d-d0d1Zd dd2d3Zed4d5d6d7d8Zd dd9d:Zdd;d"d<d=d>Zd?Zd@ZdAZdBZedCZedDZdEZdFZeeeZe !edGedHedIedJedKedLe j"Z#e !edGedMedIedJedKee j"Z$d ddNdOZ%dd dPdQdRZ&e !eZ'e !edSedTZ(e !edUZ)e !edVeZ*e+dWdXdYdZd[Z,e !d\Z-e+ddWd^d_d`dadbZ.e !edcedTZ/e !eddedTZ0e !edeedTZ1e !edfedTZ2e !edgedTZ3e !edhZ4e !ediZ5e !edjedkZ6e !edlZ7e !edmedmednedTZ8e !edmedmedoedTZ9e !edpedTZ:e !dqedredsZ;e !edtZe !d|Z?d}d~dddddddddqdqdTdTdde@d}d~e@dde@dde@dde@dde@dqdqe@dTdTe@ddiZAe+dwdddddZBe !edeZCe !edJedJeeZDe !dZEdddPddZFdd"dd,dddZGddddddZHdS)rz|Based on https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf Supports PDF up to 1.4 Nrrbz str | NonezIO[bytes] | Nonezbytes | bytearray | NonerWrrR)filenamerr start_offsetmoder cCs$|r|rd}t|||_||_||_||_d|_d|_|dur^|dur^t|||_}d|_|dur|||_d|_|st |dr|j |_i|_ |||||jr| nVd|_ |_t|_d|_t|_d|_t|_g|_g|_d|_d|_i|_t|_d|j_|r |dS)Nz4specify buf or f or filename, but not both buf and fFTrr) RuntimeErrorrrrrshould_close_bufshould_close_fileopenget_buf_from_filehasattrrcached_objects read_pdf_infofile_size_totalfile_size_thisrrootroot_refinfoinfo_refpage_tree_rootpages orig_pages pages_reflast_xref_section_offset trailer_dictrp xref_tablertseek_end)r`rrrrrrrrrrutsN   zPdfParser.__init__r\cCs|Sr@rr_rrr __enter__szPdfParser.__enter__re)argsr cGs |dSr@)close)r`rrrr__exit__szPdfParser.__exit__cCs||dSr@) close_bufrr_rrr start_writingszPdfParser.start_writingcCs"t|jtjr|jd|_dSr@)rirmmaprr_rrrrs zPdfParser.close_bufcCs2|jr||jdur.|jr.|jd|_dSr@)rrrrrr_rrrrs  zPdfParser.closecCs"|jdusJ|jdtjdS)Nr)rseekosSEEK_ENDr_rrrrszPdfParser.seek_endcCs|jdusJ|jddS)Ns %PDF-1.4 )rrr_rrr write_headerszPdfParser.write_headerr cCs*|jdusJ|jd|ddS)Nz%  )rrr)r`r rrr write_commentszPdfParser.write_commentr[cCsz|jdusJ|||j|_|d|_||j|jtd|jd|j|jtdt |j |j d|jS)NrCatalog)TypeZPagesPages)r ZCountZKids) rdel_rootnext_object_idrrr rewrite_pages write_objrrHrr_rrr write_catalogs zPdfParser.write_catalogc Csg}t|jD]\}}|j|}|j|j=||td||jvrHqi}|D]\}}||| <qT|j |d<|j di|}t|jD]\} } | |kr||j| <qq|D]6} | r|j| } | j|jvr|j| j=| dd} qqg|_dS)NsParentParent)N) rrrrrXrrrrrr write_pagerB) r`Zpages_tree_nodes_to_deleteiZpage_refZ page_infoZstringified_page_inforwrxZ new_page_refjZ cur_page_refZpages_tree_node_refZpages_tree_noderrrrs,       zPdfParser.rewrite_pageszIndirectReference | None) new_root_refr cCs|jdusJ|r |||_|jr6|d|j|_|j|j}t|j}|j|d}|j durn|j |d<|jr~|j|d<||_ |jdt t |d|dS)N)RootSizePrevInfostrailer s startxref %d %%%%EOF) rrrrrrrrrHrr r)r`rZ start_xrefZ num_entriesrrrrwrite_xref_and_trailers.     z PdfParser.write_xref_and_trailerzint | IndirectReference | Noner)refobjsdict_objr cOsVt|tr|j|n|}d|vr,td|d<d|vr>|j|d<|j|g|Ri|S)Nr Pager)rirWrrrr)r`rrrZobj_refrrrr s   zPdfParser.write_pagecOs|jdusJ|j}|dur,||}n||jf|j|j<|tt|| dd}|durtt ||d<|r|t ||D]}|t |q|dur|d|||d|d|S)NstreamZLengthsstream s endstream sendobj ) rrrrYrrXrr rorrHr)r`rrrrr!objrrrrs&      zPdfParser.write_objcCs.|jdurdS|j|jj=|j|jdj=dS)Nr )rrrXrr_rrrr/s  zPdfParser.del_rootrzbytes | mmap.mmaprcCsVt|dr|St|dr$|Sztj|dtjdWStyPYdS0dS)N getbuffergetvaluer)accessrG)rr#r$rfileno ACCESS_READ ValueError)rrrrr5s   zPdfParser.get_buf_from_filecCsD|jdusJt|j|_|j|j|_|t|jddud|jd|_ |j dus`J|jdd|_ t | |j |_ |j durt |_nt | |j |_td|j vdt|j ddkdt|j ddud tt|j dtd |j d|_|jdusJ| |j|_||j|_|jdd|_dS) NrzRoot is missingrTypez/Type missing in Rootr z/Type in Root is not /Catalogr z/Pages missing in Rootz+/Pages in Root is not an indirect reference)rrHrrr read_trailerrUrrBrrr read_indirectrrrir[rrlinearize_page_treerrr_rrrrAs<     zPdfParser.read_pdf_infoz int | None)rr cCsVztt|jdd}Wnty8tdd}Yn0|durR|df|j|j<|S)Nrr)r[maxrrr(rX)r`r referencerrrres zPdfParser.next_object_ids [][()<>{}/%]s$[][()<>{}/%\000\011\012\014\015\040]s[\000\011\012\014\015\040]s#[\000\011\012\014\015\0400-9a-fA-F]*+s[\000\011\014\040]*s[\r\n]+strailers<<(.*>>)s startxrefs([0-9]+)s%%EOF$s <<(.*?>>)cCs|jdusJt|jd}||jkr,|j}|j|j|}t|dud|}|rp|}|j|j|d}qN|sx|}|dusJ|d}t|d|_ | ||_ t |_ |j|j dd|j vr||j ddS)N@ztrailer end not foundrrxref_section_offsetr)rrHrre_trailer_endsearchrUstartgrouprWrinterpret_trailerrrprread_xref_tableread_prev_trailer)r`Zsearch_start_offsetmZ last_match trailer_datarrrr*s(     zPdfParser.read_trailer)r5r cCs|jdusJ|j|d}|j|j||d}t|dud|dusNJ|d}tt|d|kd||}d|vr||ddS)Nr4r2zprevious trailer not foundrrzGxref section offset in previous trailer doesn't match what was expectedr) rr;re_trailer_prevr7rUr9rWr:r<)r`r5Ztrailer_offsetr=r>rrrrr<s    zPdfParser.read_prev_trailers/([!-$&'*-.0-;=?-Z\\^-z|~]+)(?=rrs>>r zdict[bytes, Any])r>r cCsi}d}|j||}|sX|j||}t|duo@|t|kdt||dq||d}t |t svJ| ||\}}|||<|durq|}qtd|vot |dt dtd|vot |dt d|S)Nrz+name not found in trailer, remaining data: rrz&/Size not in trailer or not an integerrz1/Root not in trailer or not an indirect reference)re_namematch re_dict_endrUendrHrrr9rir get_valuerWr[)rr>trailerrr=rwrxZ value_offsetrrrr:s8zPdfParser.interpret_trailers([^#]*)(#([0-9a-fA-F]{2}))?FrQz str | bytes)rawas_textr cCsnd}|j|D]B}|drD||dt|dd7}q||d7}q|rb|dSt|SdS)NrGrrczutf-8)re_hashes_in_namefinditerr9rfromhexrIr )rrFrGrr=rrrrs & zPdfParser.interpret_namesnull(?=strue(?=sfalse(?=s([-+]?[0-9]+)(?=s)([-+]?([0-9]+\.[0-9]*|[0-9]*\.[0-9]+))(?=s\[]s<(s*)>rs ([-+]?[0-9]+)sR(?=sobj(?=s endobj(?=rs %[^\r\n]*s)*s stream\r?\ns endstream(?=rzbytes | bytearray | mmap.mmapztuple[Any, int | None])rrexpect_indirect max_nestingr cCs |dkr dS|j||}|r&|}|j||}|rtt|ddkdtt|ddkdt|dup|tt|dt|dkd|j|||dd\}}|dur|dfS|j ||}t|dud |dusJ||fSt| d |j ||}|rntt|ddkd tt|ddkd tt|dt|d|fS|j ||}|r|}i}|j ||}|} |s"| dusJ|j|| |dd\} } | dur|dfS|j|| |dd\} } | || <| dur|dfS|j || }q|} |j || }|r|d } | dus^t| tsrd| d} t| |||| }|j||| }t|dud|dusJ|} tt||| fSt|| fS|j||}|rz|}g}|j||}|} |sn| dus(J|j|| |dd\} } || | dur\|dfS|j|| }q||fS|j||}|rd|fS|j||}|rd|fS|j||}|rd|fS|j||}|r t||d|fS|j||}|r4t|d|fS|j||}|r^t|d|fS|j ||}|rt!dd|dD}t"|ddkr|t#dt!$|%d|fS|j&||}|r|'||Sdt(|||d} t| dS)Nr)NNrz.0rczunrecognized object: )) re_commentrArCre_indirect_def_startrUrWr9r[rDre_indirect_def_endre_indirect_reference re_dict_startrBre_stream_startrBrirL re_stream_endrrre_array_start re_array_endrre_nullre_truere_falser@rrre_intre_realr re_string_hexrrHrrKrI re_string_litget_literal_stringr)rrrrMrNr=reZ object_offsetrcurrent_offsetrwrxZ stream_lenrZ stream_dataresultsZ hex_stringrrrrD(s   &              zPdfParser.get_valuesF(\\[nrtbf()\\])|(\\[0-9]{1,3})|(\\(\r\n|\r|\n))|(\r\n|\r|\n)|(\()|(\))nrr t bf rztuple[bytes, int])rrr cCsd}t}|j||D]}|||||drX||j|ddn|dr|t|ddddnx|drnl|dr|dnV|dr|d |d7}n8|d r|dkrt || fS|d |d8}| }qd }t |dS) NrrrrHrrrzunfinished literal string) rre_lit_str_tokenrJrr8r9 escaped_charsrrWr rCrL)rrrZ nesting_depthrr=rrrrras.            zPdfParser.get_literal_stringsxrefs+([0-9]{10}) ([0-9]{5}) ([fn])( \r| \n|\r\n)c Cs&|jdusJd}|j|j||j}t|dud|dusBJ|}|j|j|}|slt|dq"d}|}t|d}t|d}t |||D]|}|j |j|}t|dud|dusJ|}|dd k}|st|d} t|d| f} ||j vr| |j |<qqJ|S) NFzxref section start not foundzxref subsection start not foundTrrzxref entry not foundrHrk) rre_xref_section_startrArrUrCre_xref_subsection_startrWr9r re_xref_entryr) r`r5Zsubsection_foundr=rZ first_object num_objectsrZis_freerYZ new_entryrrrr;s<    zPdfParser.read_xref_table)rrNr c Cs|j|d\}}t||dkd|dd|dd|d||jdusRJ|j|j||jt||dd}||j|<|S)Nrrzexpected to find generation z for object ID z) in xref table, instead found generation z at offset )rMrN)rrUrrDrr[r)r`rrNrrYrxrrrr+s(  zPdfParser.read_indirectzPdfDict | Nonezlist[IndirectReference])noder cCsn|dur |n|j}t|ddkdg}|dD]8}||}|ddkrV||q0||j|dq0|S)Nr)r z%/Type of page tree node is not /PagessKidsr )rw)rrUr+rrr,)r`rwZ page_noderkidZ kid_objectrrrr,#s     zPdfParser.linearize_page_tree)NNNrr)N)N)F)Nr)r)N)IrMrNrOrPrurrrrrrrr rrrrrr staticmethodrrr delimiterZdelimiter_or_ws whitespaceZwhitespace_or_hexZwhitespace_optionalZwhitespace_mandatoryZwhitespace_optional_no_nlZ newline_onlynewlinerecompileDOTALLr6r?r*r<Zre_whitespace_optionalr@rUrBrr:rIrrZr[r\r]r^rXrYr_r`rTrRrSrQrVrWrDrqrrrrarsrtrur;r+r,rrrrrns1  $                !r)& __future__rrrrrrr}rrtypingrrrrrrrArKrrLrUrVr[rorprrrrrr Z _DictBaserrrrrrrrrs -^)0