RECURRENT DONATION
Donate monthly to support
the NeculaiFantanaru.com project
您可以在此处查看完整代码:HTTPS://帕萨特斌.com/V1MDX0一定 安装Python. 有几个HTML链接,全部包含在该部分中<! - flags_1 - >到<! - 标志 - > 所有HTML文件都具有以下结构,只有链接是不同的。 因此,下面的链接都不会在其他HTML页面中重复(在标志部分中)重复。 所有链接都从 HTTPS://呢粗来烦他那如.com/
Codul:查找本节中具有相同链接的所有HTML文件(来自同一文件夹) <! - flags_1 - > 该代码还将显示他们所在的重复链接和HTML页面. import sys import re import os def read_text_from_file(file_path): """ Aceasta functie returneaza continutul unui fisier. file_path: calea catre fisierul din care vrei sa citesti """ with open(file_path, encoding='utf8') as f: text = f.read() return text def write_to_file(text, file_path): """ Aceasta functie scrie un text intr-un fisier. text: textul pe care vrei sa il scrii file_path: calea catre fisierul in care vrei sa scrii """ with open(file_path, 'wb') as f: f.write(text.encode('utf8', 'ignore')) def extragere_linkuri(cale_fisier_html): text_html = read_text_from_file(cale_fisier_html) flags_pattern = re.compile('([\s\S]*?)[\s\S]*?') text_flags = re.findall(flags_pattern, text_html) if len(text_flags) != 0: text_flags = text_flags[0] link_pattern = 'href=\"(.*?)\"' links = re.findall(link_pattern, text_flags) links = list(set(links)) return links def verificare_fisiere(cale_folder_fisiere, cale_fisier_rezultat): cai_fisiere = list() lista_linkuri = list() for f in os.listdir(cale_folder_fisiere): if f.endswith('.html'): cale_fisier_html = cale_folder_fisiere + "\\" + f links = extragere_linkuri(cale_fisier_html) cai_fisiere.append(cale_fisier_html) lista_linkuri.append(links) else: continue rezultate = '' for i in range(0, len(lista_linkuri)): for j in range(i + 1, len(lista_linkuri)): if len(set(lista_linkuri[i]).intersection(set(lista_linkuri[j]))) != 0: rezultate += "Fisiere comune: \n" print("Fisiere comune: ") for link in set(lista_linkuri[i]).intersection(set(lista_linkuri[j])): rezultate += link rezultate += '\n' print(link, '\n') rezultate += 'Fisier {} ARE LINKURI IN COMUN CU: {}'.format(cai_fisiere[i], cai_fisiere[j]) rezultate += '\n\n' print('Fisier {} ARE LINKURI IN COMUN CU: {}'.format(cai_fisiere[i], cai_fisiere[j])) print('\n\n') limba = "en" # BEBE AICI VEZI EXACT FOLDERUL, sa lasi doar "" daca vrei sa cauti in limba romana rezultate += "==========={}============\n\n".format(limba.upper()) print("==========={}============\n\n".format(limba.upper())) for i in range(0, len(lista_linkuri)): for j in range(i + 1, len(lista_linkuri)): linkuri_limba = list() if len(set(lista_linkuri[i]).intersection(set(lista_linkuri[j]))) != 0: for link in set(lista_linkuri[i]).intersection(set(lista_linkuri[j])): if limba in link.split('/'): linkuri_limba.append(link) if len(linkuri_limba) != 0: rezultate += "Fisiere comune: \n" print("Fisiere comune: ") for link in linkuri_limba: rezultate += link rezultate += '\n' print(link, '\n') rezultate += 'Fisier {} ARE LINKURI IN COMUN CU: {}'.format(cai_fisiere[i], cai_fisiere[j]) rezultate += '\n\n' print('Fisier {} ARE LINKURI IN COMUN CU: {}'.format(cai_fisiere[i], cai_fisiere[j])) print('\n\n') write_to_file(rezultate, cale_fisier_rezultat) if __name__ == "__main__": verificare_fisiere("c:\\Folder1", "c:\\Folder1\\rezultate.txt") # verificare_fisiere("e:\\Carte\\BB\\17 - Site Leadership\\Principal\\en", "c:\\Folder1\\rezultate.txt") That's all folks. If you like my code, then make me a favor: translate your website into Romanian, "ro". Latest articles accessed by readers:
Donate via Paypal
RECURRENT DONATIONDonate monthly to support SINGLE DONATIONDonate the desired amount to support Donate by Bank TransferAccount Ron: RO34INGB0000999900448439
Open account at ING Bank
|
||||||||||||
![]() |
||||||||||||