Puteţi vizualiza întregul cod aici: https://pastebin.com/qmpbmnMa
Instalaţi Python.
For example I have this page:
my-name-is-prince.html
And this html page has the title tag: <title>I love Freddy Mercury</title>
Output: After running the python code, I will parse and convert the title tag into link. Will become:
i-love-freddy-mercury.html with the same <title>I love Freddy Mercury</title>
from bs4 import BeautifulSoup from bs4.formatter import HTMLFormatter import requests import re import execjs from urllib import parse import json import os class UnsortedAttributes(HTMLFormatter): def attributes(self, tag): for k, v in tag.attrs.items(): yield k, v def read_text_from_file(file_path): """ Aceasta functie returneaza continutul unui fisier. file_path: calea catre fisierul din care vrei sa citesti """ with open(file_path, encoding='utf8') as f: text = f.read() return text def write_to_file(text, file_path): """ Aceasta functie scrie un text intr-un fisier. text: textul pe care vrei sa il scrii file_path: calea catre fisierul in care vrei sa scrii """ with open(file_path, 'wb') as f: f.write(text.encode('utf8', 'ignore')) files_from_folder = "e:\\Folder" extension_file = ".html" directory = os.fsencode(files_from_folder) amount = 1 for file in os.listdir(directory): filename = os.fsdecode(file) if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html': continue if filename.endswith(extension_file): current_file_name = '' new_file_name = '' with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html: file_text = html.read() soup = BeautifulSoup('<pre>' + file_text + '</pre>', 'html.parser') text_title = soup.findAll('title')[0].get_text() print(f'{filename} changed filename ({amount})') amount += 1 new_filename = text_title # replace 's new_filename = re.sub('\'\w', '', new_filename) new_filename = new_filename.lower() words = re.findall(r'\w+', new_filename) new_filename = '-'.join(words) new_filename = new_filename + '.html' new_filename = os.fsdecode(new_filename) # inlocuire nume fisier current_file_name = os.path.join(files_from_folder, filename) new_file_name = os.path.join(files_from_folder, new_filename) canonical_pattern = re.compile('<link rel="canonical" href="(.*?)" />') canonical = re.findall(canonical_pattern, file_text) if len(canonical) > 0: canonical = canonical[0] link_nou = "https://trinketbox.ro/en/" + '-'.join(words) + ".html" file_text = file_text.replace(canonical, link_nou) write_to_file(file_text, current_file_name) else: print("Nu am gasit tag-ul canonical in fisier") html.close() os.rename(current_file_name, new_file_name)
That's all folks.
If you like my code, then make me a favor: translate your website into Romanian, "ro".
Also, you can see other Python Codes: VERSION 2 of this code. Or Version 3 OR Version 4 OR Version 5
Puteţi vizualiza şi versiunea de cod în PowerShell or VERSION 2 or VERSION 3