1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
| import re import openpyxl
log = "3.txt" filename = "tmp.txt"
def process_log(): with open(log, 'r', encoding='GBK') as file: lines = file.readlines()
required_lines = [line for line in lines if 'sim' in line or 'return productcodes' in line or 'hit' in line or 'start' in line]
with open('tmp.txt', 'w', encoding='GBK') as file: file.writelines(required_lines) print("日志预处理完成")
def get_dict_from_line(line): match = re.search(r'code:(\d+),.*sim:(\d+(\.\d+)?)', line) return {match.group(1): match.group(2)} if match else None
def get_list_from_per_identify(lines,start_position): processed_list = [] num = len(lines[start_position - 2].split(",")) - 2 processed_list.append(num) code = lines[start_position - 1].split(" ")[6] processed_list.append(code) hit = lines[start_position + 1].split(",")[1][-2:].strip() for i in range(start_position - 3, start_position - 9, -1): if i >= 0: new_dict = get_dict_from_line(lines[i]) if new_dict: processed_list.append(new_dict) return hit, processed_list
def recommend_product_2(data): max_confidence = float(list(data[0].values())[0]) recommended_codes = [list(item.keys())[0] for item in data if max_confidence - 0.03 <= float(list(item.values())[0]) <= max_confidence + 0.03] recommended_codes = list(set(recommended_codes)) return recommended_codes
def recommend_product_3(data): max_confidence = float(list(data[0].values())[0]) max_confidence_code = list(data[0].keys())[0] if max_confidence >= 0.96: result = [max_confidence_code] elif max_confidence >= 0.88: if float(list(data[1].values())[0]) >= 0.88: result = [max_confidence_code, list(data[1].keys())[0]] else: result = [max_confidence_code] elif max_confidence >= 0.8: if float(list(data[2].values())[0]) >= 0.8: result = [max_confidence_code, list(data[1].keys())[0], list(data[2].keys())[0]] elif float(list(data[1].values())[0]) >= 0.8: result =[max_confidence_code, list(data[1].keys())[0]] else: result = [max_confidence_code] else: print("未推荐出商品", data) return [] return list(set(result))
def handle_file(filename): with open(filename, 'r', encoding="GBK") as file: lines = file.readlines() workbook = openpyxl.load_workbook("aa.xlsx") worksheet = workbook.worksheets[2] count = 1 start_positions = [idx for idx, line in enumerate(lines) if 'start feed back' in line] for start_position in start_positions: hit, identify_list = get_list_from_per_identify(lines, start_position) if len(identify_list) == 8: count = count + 1 print(count) num_0 = identify_list[0] if hit != "-1": result_0 = 1 else: result_0 = 0 result_list = sorted(identify_list[2:], key=lambda x: list(x.values())[0], reverse=True)
code_list_2 = recommend_product_2(result_list) num_2 = len(code_list_2) if identify_list[1] in code_list_2: result_2 = 1 else: result_2 = 0
code_list_3 = recommend_product_3(result_list) num_3 = len(code_list_3) if identify_list[1] in code_list_3: result_3 = 1 else: result_3 = 0
worksheet["A{}".format(count)] = str(identify_list[0:2]+result_list) worksheet["B{}".format(count)] = num_0 worksheet["C{}".format(count)] = result_0 worksheet["D{}".format(count)] = num_2 worksheet["E{}".format(count)] = result_2 worksheet["F{}".format(count)] = num_3 worksheet["G{}".format(count)] = result_3 for i in ['B', 'C', 'D', 'E', 'F', 'G']: worksheet["{}{}".format(i, (count + 1),)] = "=AVERAGE({}2:{}{})".format(i, i, count) workbook.save("aa.xlsx") print("OK")
process_log() handle_file(filename)
|