4 anni fa · 8cedd872ed
--- a/ondemand/service_streamlit.py
+++ b/ondemand/service_streamlit.py
@@ -13,9 +13,12 @@ import mutagen.mp3
 
				 import math
			
 
				 import sys
			
 
				 import os
			
 
				+import time
			
 
				 from datetime import datetime
			
 
				 import streamlit as st
			
 
				 
			
 
				+st.set_page_config(layout="wide")
			
 
				+
			
 
				 if sys.version_info >= (3, 0):
			
 
				     from queue import Queue, Empty
			
 
				 else:
			
@@ -67,17 +70,14 @@ db_path = config.get('localDatabase', os.path.join(device_path, 'files.db'))
 
				 cloud_cache = {}
			
 
				 
			
 
				 def process_segment(anuncios, grabaciones, audios=None, calibration=None):
			
 
				-    """ Procesa una hora de audio """
			
 
				-    # print(anuncio +" y "+ audio_busqueda)
			
 
				-    # date = dateutil.parser.parse(item['fecha'], ignoretz=True)
			
 
				-    segment_size = 5
			
 
				+    segment_size = int(calibration['ss'])
			
 
				     audio_length = 0
			
 
				 
			
 
				-    anuncios = ["anuncio"]
			
 
				     dejavu = Dejavu({"database_type": "mem"})
			
 
				     try:
			
 
				         for i in range(0, len(anuncios)):
			
 
				-            dejavu.fingerprint_file("anuncios/audio-{}.mp3".format(i,))
			
 
				+            path = "anuncios/{}".format(anuncios[i].name,)
			
 
				+            dejavu.fingerprint_file(path)
			
 
				     except Exception as ex:
			
 
				         log.error('[process_segment] cannot fingerprint: {}'.format(ex))
			
 
				 
			
@@ -85,27 +85,34 @@ def process_segment(anuncios, grabaciones, audios=None, calibration=None):
 
				     results = []
			
 
				     v = []
			
 
				 
			
 
				+    st.subheader("Resultados de la comparación")
			
 
				     for i in range(0, len(grabaciones)):
			
 
				-        path = "grabaciones/audio-{}.mp3".format(i,)
			
 
				+        path = "grabaciones/{}".format(grabaciones[i].name,)
			
 
				         values = []
			
 
				         try:
			
 
				+            seconds = 0
			
 
				             for match in dejavu.recognize(recognizer, path, segment_size):
			
 
				-                name = path
			
 
				+                name = ""
			
 
				+                if "name" in match:
			
 
				+                    name = match["name"]
			
 
				 
			
 
				                 results.append({
			
 
				-                    'confidence': match['confidence'],
			
 
				-                    'offset': match['offset'],
			
 
				-                    'name': name
			
 
				+                    "path": path,
			
 
				+                    "name": name,
			
 
				+                    "confidence": match["confidence"],
			
 
				+                    "offset": match["offset"],
			
 
				+                    "offset_seconds": seconds
			
 
				                 })
			
 
				                 values.append(str(match['confidence']))
			
 
				+                seconds += segment_size
			
 
				 
			
 
				             v.append(','.join(values))
			
 
				-            log.info('[process_segment] {0} {1}'.format(
			
 
				-                os.path.split(path)[-1],
			
 
				+            log.info('{0} {1}'.format(
			
 
				+                grabaciones[i].name,
			
 
				                 ','.join(values),
			
 
				             ))
			
 
				-            st.text('[process_segment] {0} {1}'.format(
			
 
				-                os.path.split(path)[-1],
			
 
				+            st.text('{0} {1}'.format(
			
 
				+                grabaciones[i].name,
			
 
				                 ','.join(values),
			
 
				             ))
			
 
				 
			
@@ -113,22 +120,9 @@ def process_segment(anuncios, grabaciones, audios=None, calibration=None):
 
				             log.error('[process_segment] {}'.format(ex))
			
 
				 
			
 
				     try:
			
 
				-        encontrados = {}
			
 
				-        item_ids = []
			
 
				-        for i in item_ids:
			
 
				-            r = [result for result in results if result["name"] == i]
			
 
				-            encontrados[i] = find_repetitions(r, segments_needed=segments_needed, calibration=calibration,)
			
 
				-
			
 
				-        #for id in encontrados:r
			
 
				-        #    for e in encontrados[id]:
			
 
				-        #        for i in item['elementos']:
			
 
				-        #            if i['id'] == id and i['anuncio'] == e['ad']:
			
 
				-        #                if 'encontrados' not in i:
			
 
				-        #                    i['encontrados'] = []
			
 
				-        #                i['encontrados'].append(e)
			
 
				-        #                break
			
 
				-
			
 
				-        #item["archivos_perdidos"] = (12 - audios_counter) if audios_counter < 12 else 0
			
 
				+        encontrados = find_repetitions(results, segments_needed=int(calibration['sn']), calibration=calibration)
			
 
				+        st.subheader("Encontrados")
			
 
				+        st.write(encontrados)
			
 
				     except ConnectionError as ex:
			
 
				         log.error('[process_segment] {}'.format(str(ex)))
			
 
				     except UserWarning as warn:
			
@@ -142,17 +136,12 @@ def find_repetitions(results, segments_needed=2, calibration=None):
 
				     expect_space = False
			
 
				     expect_recover = False
			
 
				     last_value_in_threshold_index = -1
			
 
				-    fall_tolerance = calibration['fallTolerance']
			
 
				+    fall_tolerance = calibration['tf']
			
 
				     found = []
			
 
				-
			
 
				-    high = 100 # Obtener este valor desde un parámetro
			
 
				-    middle_high = 50 # Obtener este valor desde un parámetro
			
 
				-    segment_middle_needed = 2 # Obtener este valor desde un parámetro
			
 
				-    found_high = None
			
 
				-    found_middle_high = []
			
 
				+    last_found = None
			
 
				 
			
 
				     if threshold_mode == THRESHOLD_FIXED:
			
 
				-        threshold = calibration['threshold']
			
 
				+        threshold = int(calibration['th'])
			
 
				     elif threshold_mode == THRESHOLD_AVERAGE:
			
 
				         values = [x['confidence'] for x in results]
			
 
				         threshold = math.ceil(float(sum(values)) / float(len(values)))
			
@@ -161,14 +150,6 @@ def find_repetitions(results, segments_needed=2, calibration=None):
 
				         segments_needed = 1
			
 
				 
			
 
				     for index, result in enumerate(results):
			
 
				-        #if result['confidence'] >= high:
			
 
				-        #    if found_high is None:
			
 
				-        #        found_high = index
			
 
				-        #    elif result['confidence'] > results[found_high]['confidence']:
			
 
				-        #        found_high = index
			
 
				-        #elif result['confidence'] >= middle_high:
			
 
				-        #    found_middle_high.append(index)
			
 
				-
			
 
				         if not expect_space:
			
 
				             if result['confidence'] >= threshold:
			
 
				                 found_counter += 1
			
@@ -182,17 +163,11 @@ def find_repetitions(results, segments_needed=2, calibration=None):
 
				             elif fall_tolerance:
			
 
				                 if not expect_recover:
			
 
				                     if last_value_in_threshold_index != -1:
			
 
				-                        """ Solo cuando ya haya entrado por lo menos
			
 
				-                        un valor en el rango del threshold, es cuando
			
 
				-                        se podrá esperar un valor bajo """
			
 
				                         expect_recover = True
			
 
				                         found_down_counter += 1
			
 
				                     else:
			
 
				                         pass
			
 
				                 else:
			
 
				-                    """ Si después de haber pasado tolerado 1 elemento
			
 
				-                    vuelve a salir otro fuera del threshold continuo,
			
 
				-                    entonces ya se da por perdido """
			
 
				                     found_counter = 0
			
 
				                     found_down_counter = 0
			
 
				                     found_index = None
			
@@ -203,27 +178,17 @@ def find_repetitions(results, segments_needed=2, calibration=None):
 
				                 found_down_counter = 0
			
 
				                 found_index = None
			
 
				                 expect_recover = False
			
 
				-                # Aquí veremos si hay un valor alto
			
 
				-                #if found_high is not None:
			
 
				-                #    found_row = results[found_high]
			
 
				-                #    found.append(found_row)
			
 
				-                #elif len(found_middle_high) >= segment_middle_needed:
			
 
				-                #    found_row = results[found_middle_high[0]]
			
 
				-                #    found.append(found_row)
			
 
				-                #found_high = None
			
 
				-                #found_middle_high = []
			
 
				 
			
 
				         else:
			
 
				             if result['confidence'] <= threshold:
			
 
				                 expect_space = False
			
 
				 
			
 
				-        if found_counter >= segments_needed:
			
 
				+        if found_counter >= segments_needed and last_found != found_index:
			
 
				             found_row = results[found_index]
			
 
				             found.append(found_row)
			
 
				+            last_found = found_index
			
 
				             found_counter = 0
			
 
				             expect_space = True
			
 
				-            #found_high = None
			
 
				-            #found_middle_high = []
			
 
				 
			
 
				     return found
			
 
				 
			
@@ -239,20 +204,40 @@ def limpiar_archivos():
 
				 
			
 
				 
			
 
				 def main():
			
 
				-    if st.button("Limpiar archivos"):
			
 
				-        limpiar_archivos()
			
 
				 
			
 
				-    anuncios = st.file_uploader("Elige los anuncios", accept_multiple_files=True, type="mp3")
			
 
				+    st.subheader('Subir archivos para comparar')
			
 
				+
			
 
				+    u1, u2 = st.beta_columns([3, 3])
			
 
				+    anuncios = u1.file_uploader("Anuncios", accept_multiple_files=True, type="mp3")
			
 
				     for i in range(0, len(anuncios)):
			
 
				-        with open("anuncios/audio-{}.mp3".format(i,), "wb") as audio:
			
 
				+        with open("anuncios/{}".format(anuncios[i].name,), "wb") as audio:
			
 
				             audio.write(anuncios[i].getvalue())
			
 
				 
			
 
				-    grabaciones = st.file_uploader("Elige la grabación", accept_multiple_files=True, type="mp3")
			
 
				+    grabaciones = u2.file_uploader("Grabaciones", accept_multiple_files=True, type="mp3")
			
 
				+    grabaciones.sort(key=lambda x: x.name)
			
 
				     for i in range(0, len(grabaciones)):
			
 
				-        with open("grabaciones/audio-{}.mp3".format(i,), "wb") as audio:
			
 
				+        with open("grabaciones/{}".format(grabaciones[i].name,), "wb") as audio:
			
 
				             audio.write(grabaciones[i].getvalue())
			
 
				 
			
 
				+    if st.button("Borrar archivos anteriores"):
			
 
				+        limpiar_archivos()
			
 
				+
			
 
				+    st.subheader('Parámetros de calibración')
			
 
				+
			
 
				+    col1, col2, col3, col4 = st.beta_columns([1,1,1,1])
			
 
				+    umbral = col1.text_input("Umbral", 12)
			
 
				+    segmentos_necesarios = col2.text_input("Sementos necesarios", 4)
			
 
				+    caida = col3.text_input("Tolerancia a caida", 1)
			
 
				+    segmento = col4.text_input("Tamaño del Segmento", 5)
			
 
				+
			
 
				+    calibracion = {
			
 
				+        "th": umbral,
			
 
				+        "tf": caida,
			
 
				+        "sn": segmentos_necesarios,
			
 
				+        "ss": segmento
			
 
				+    }
			
 
				+
			
 
				     if st.button("Comparar"):
			
 
				-        process_segment(anuncios, grabaciones)
			
 
				+        process_segment(anuncios, grabaciones, calibration=calibracion)
			
 
				 
			
 
				 main()