import os import base64 import zipfile import tkinter as tk from tkinter import filedialog, messagebox from bs4 import BeautifulSoup def convert(doc_text, file_out): """Decodes base64 content and writes it to a file.""" with open(file_out, 'wb') as o: # Split on CRLF (as in the original code) for line in doc_text.split('\r\n'): if line.strip(): # ignore empty lines o.write(base64.b64decode(line)) def extract(file_in, log_func=print): """ Extracts internal files from the XML file. For each element found, the function decodes the base64 content, saves it as a zip file, extracts the zip file, and renames the extracted file based on the original file name. """ try: with open(file_in, 'r', encoding='utf-8') as f: soup = BeautifulSoup(f, 'xml') except Exception as e: log_func(f"Error opening {file_in}: {e}") return counter = 0 # Look for all elements for internal_file in soup.find_all('internal-file'): zip_file_name = f"{file_in}_{counter}.zip" try: convert(internal_file.text, zip_file_name) except Exception as e: log_func(f"Error converting base64 content in {file_in} at item {counter}: {e}") continue # Try to extract the original file name from the parent element file_element = internal_file.find_parent('file') if file_element and file_element.has_attr('original'): original_path = file_element['original'] original_name = original_path.split("\\")[-1] # Get file name after last backslash else: original_name = f"extracted_{counter}" # Fallback name try: with zipfile.ZipFile(zip_file_name, 'r') as zip_ref: extracted_files = zip_ref.namelist() zip_ref.extractall() except Exception as e: log_func(f"Error extracting zip file {zip_file_name}: {e}") counter += 1 continue if extracted_files: # Assume there is one file in the zip archive extracted_file_path = extracted_files[0] new_file_path = os.path.join(os.getcwd(), original_name) try: os.rename(extracted_file_path, new_file_path) log_func(f"Extracted and renamed: {new_file_path}") except Exception as e: log_func(f"Error renaming {extracted_file_path} to {new_file_path}: {e}") else: log_func(f"No files found in zip archive {zip_file_name}.") counter += 1 class ExtractorGUI: def __init__(self, master): self.master = master master.title("XML Internal File Extractor") self.selected_files = [] # Create a frame to hold the buttons button_frame = tk.Frame(master) button_frame.pack(pady=10) # Button to select files select_button = tk.Button( button_frame, text="Select XML File(s)", command=self.select_files, width=20 ) select_button.pack(side=tk.LEFT, padx=5) # Button to run extraction extract_button = tk.Button( button_frame, text="Extract", command=self.extract_files, width=20 ) extract_button.pack(side=tk.LEFT, padx=5) # Button to clear the log text area clear_button = tk.Button( button_frame, text="Clear Log", command=self.clear_log, width=20 ) clear_button.pack(side=tk.LEFT, padx=5) # Label to show how many files have been selected self.file_label = tk.Label(master, text="No files selected") self.file_label.pack(pady=5) # Text area for logging output self.text_area = tk.Text(master, wrap=tk.WORD, width=80, height=20) self.text_area.pack(padx=10, pady=10) # Scrollbar for the text area scrollbar = tk.Scrollbar(master, command=self.text_area.yview) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) self.text_area.config(yscrollcommand=scrollbar.set) def select_files(self): """Open a file dialog to select one or more XML files.""" files = filedialog.askopenfilenames( title="Select XML Files", filetypes=[("XML files", "*.xml"), ("All files", "*.*")] ) if files: self.selected_files = list(files) self.file_label.config(text=f"{len(self.selected_files)} file(s) selected") self.log_message("Selected files:") for f in self.selected_files: self.log_message(f" {f}") else: self.selected_files = [] self.file_label.config(text="No files selected") self.log_message("No files were selected.") def extract_files(self): """Run the extraction on all selected files.""" if not self.selected_files: messagebox.showwarning("No Files Selected", "Please select at least one XML file to extract.") return for file in self.selected_files: self.log_message(f"Processing file: {file}") extract(file, log_func=self.log_message) self.log_message("Extraction complete.\n") def log_message(self, message): """Append a message to the log text area.""" self.text_area.insert(tk.END, message + "\n") self.text_area.see(tk.END) def clear_log(self): """Clear the log text area.""" self.text_area.delete('1.0', tk.END) if __name__ == "__main__": root = tk.Tk() app = ExtractorGUI(root) root.mainloop()