You can view the full code here: https://pastebin.com/awhXMcG3
The following tags must be in both the html file in Folder A and the other html files in Folder B. The Python code will parse the following tags:
<title></title>, <meta name="description" content=" "/>, all from <!-- ARTICLE START --> to <!-- ARTICLE FINAL -->, all from <!-- FLAGS_1 --> to <!-- FLAGS -->, all from <!-- MENU START --> to <!-- MENU FINAL -->
This is the structure of the files. Both the file in Folder A must have the same html tags, respectively the same commented sections. From the html file in Folder A all these sections will be copied to the files in Folder B.
Important: The content of the tags and the content of the comments (Text Text) are different in the file in Folder A compared to the html files in Folder B. This is also the idea. I want the contents of these tags in Folder A to replace the contents of the same tags in the files in Folder B.
E.g. From the example.html file (from Folder A) the following sections will be copied to the one.html and two.html files (from Folder B)
<title>YOUR FIRST PAGE</title> <meta name="description" content="I LOVE HTML and CSS"/> <!-- ARTICLE START --> Text Text <!-- ARTICLE FINAL --> <!-- FLAGS_1 --> Text Text <!-- FLAGS --> <!-- MENU START --> Text Text <!-- MENU FINAL -->
Python Code:
import requests import re # The folder that contains the file you want to parse english_folder1 = r"d:\Downloads\A" # The folder with the files you want to change english_folder2 = r"d:\Downloads\B" # The file you want to make parsing file_to_parse_from = 'example.html' extension_file = ".html" use_parse_folder = True import os en1_directory = os.fsencode(english_folder1) en2_directory = os.fsencode(english_folder2) print('Going through english folder') for file in os.listdir(en2_directory): filename = os.fsdecode(file) print(filename) if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html': continue if filename.endswith(extension_file): with open(os.path.join(english_folder1, file_to_parse_from), encoding='utf-8') as html: html = html.read() try: with open(os.path.join(english_folder2, filename), encoding='utf-8') as en_html: en_html = en_html.read() title = re.search('<title.+/title>', html)[0] meta = re.search('<meta name="description".+>', html)[0] comment_body = re.search('<!-- ARTICLE START -->.+<!-- ARTICLE FINAL -->', html, flags=re.DOTALL)[0] try: comment_body2 = re.search('<!-- FLAGS_1 -->.+<!-- FLAGS -->', html, flags=re.DOTALL)[0] en_html = re.sub('<!-- FLAGS_1 -->.+<!-- FLAGS -->', comment_body2, en_html, flags=re.DOTALL) except: pass try: comment_body3 = re.search('<!-- MENU START -->.+<!-- MENU FINAL -->', html, flags=re.DOTALL)[0] en_html = re.sub('<!-- MENU START -->.+<!-- MENU FINAL -->', comment_body3, en_html, flags=re.DOTALL) except: pass en_html = re.sub('<!-- ARTICLE START -->.+<!-- ARTICLE FINAL -->', comment_body, en_html, flags=re.DOTALL) en_html = re.sub('<meta name="description".+>', meta, en_html) en_html = re.sub('<title.+/title>', title, en_html) except FileNotFoundError: continue print(f'{filename} parsed') if use_parse_folder: try: with open(os.path.join(english_folder2+r'\parsed', 'parsed_'+filename), 'w', encoding='utf-8') as new_html: new_html.write(en_html) except: os.mkdir(english_folder2+r'\parsed') with open(os.path.join(english_folder2+r'\parsed', 'parsed_'+filename), 'w', encoding='utf-8') as new_html: new_html.write(en_html) else: with open(os.path.join(english_folder2, 'parsed_'+filename), 'w', encoding='utf-8') as html: html.write(en_html)
That's all folks.
If you like my code, then make me a favor: translate your website into Romanian, "ro".
Also, see this VERSION 2 or VERSION 3 or VERSION 4 or VERSION 5 or VERSION 6 or VERSION 7