|
@@ -13,9 +13,12 @@ import mutagen.mp3
|
|
|
import math
|
|
|
import sys
|
|
|
import os
|
|
|
+import time
|
|
|
from datetime import datetime
|
|
|
import streamlit as st
|
|
|
|
|
|
+st.set_page_config(layout="wide")
|
|
|
+
|
|
|
if sys.version_info >= (3, 0):
|
|
|
from queue import Queue, Empty
|
|
|
else:
|
|
@@ -67,17 +70,14 @@ db_path = config.get('localDatabase', os.path.join(device_path, 'files.db'))
|
|
|
cloud_cache = {}
|
|
|
|
|
|
def process_segment(anuncios, grabaciones, audios=None, calibration=None):
|
|
|
- """ Procesa una hora de audio """
|
|
|
- # print(anuncio +" y "+ audio_busqueda)
|
|
|
- # date = dateutil.parser.parse(item['fecha'], ignoretz=True)
|
|
|
- segment_size = 5
|
|
|
+ segment_size = int(calibration['ss'])
|
|
|
audio_length = 0
|
|
|
|
|
|
- anuncios = ["anuncio"]
|
|
|
dejavu = Dejavu({"database_type": "mem"})
|
|
|
try:
|
|
|
for i in range(0, len(anuncios)):
|
|
|
- dejavu.fingerprint_file("anuncios/audio-{}.mp3".format(i,))
|
|
|
+ path = "anuncios/{}".format(anuncios[i].name,)
|
|
|
+ dejavu.fingerprint_file(path)
|
|
|
except Exception as ex:
|
|
|
log.error('[process_segment] cannot fingerprint: {}'.format(ex))
|
|
|
|
|
@@ -85,27 +85,34 @@ def process_segment(anuncios, grabaciones, audios=None, calibration=None):
|
|
|
results = []
|
|
|
v = []
|
|
|
|
|
|
+ st.subheader("Resultados de la comparación")
|
|
|
for i in range(0, len(grabaciones)):
|
|
|
- path = "grabaciones/audio-{}.mp3".format(i,)
|
|
|
+ path = "grabaciones/{}".format(grabaciones[i].name,)
|
|
|
values = []
|
|
|
try:
|
|
|
+ seconds = 0
|
|
|
for match in dejavu.recognize(recognizer, path, segment_size):
|
|
|
- name = path
|
|
|
+ name = ""
|
|
|
+ if "name" in match:
|
|
|
+ name = match["name"]
|
|
|
|
|
|
results.append({
|
|
|
- 'confidence': match['confidence'],
|
|
|
- 'offset': match['offset'],
|
|
|
- 'name': name
|
|
|
+ "path": path,
|
|
|
+ "name": name,
|
|
|
+ "confidence": match["confidence"],
|
|
|
+ "offset": match["offset"],
|
|
|
+ "offset_seconds": seconds
|
|
|
})
|
|
|
values.append(str(match['confidence']))
|
|
|
+ seconds += segment_size
|
|
|
|
|
|
v.append(','.join(values))
|
|
|
- log.info('[process_segment] {0} {1}'.format(
|
|
|
- os.path.split(path)[-1],
|
|
|
+ log.info('{0} {1}'.format(
|
|
|
+ grabaciones[i].name,
|
|
|
','.join(values),
|
|
|
))
|
|
|
- st.text('[process_segment] {0} {1}'.format(
|
|
|
- os.path.split(path)[-1],
|
|
|
+ st.text('{0} {1}'.format(
|
|
|
+ grabaciones[i].name,
|
|
|
','.join(values),
|
|
|
))
|
|
|
|
|
@@ -113,22 +120,9 @@ def process_segment(anuncios, grabaciones, audios=None, calibration=None):
|
|
|
log.error('[process_segment] {}'.format(ex))
|
|
|
|
|
|
try:
|
|
|
- encontrados = {}
|
|
|
- item_ids = []
|
|
|
- for i in item_ids:
|
|
|
- r = [result for result in results if result["name"] == i]
|
|
|
- encontrados[i] = find_repetitions(r, segments_needed=segments_needed, calibration=calibration,)
|
|
|
-
|
|
|
- #for id in encontrados:r
|
|
|
- # for e in encontrados[id]:
|
|
|
- # for i in item['elementos']:
|
|
|
- # if i['id'] == id and i['anuncio'] == e['ad']:
|
|
|
- # if 'encontrados' not in i:
|
|
|
- # i['encontrados'] = []
|
|
|
- # i['encontrados'].append(e)
|
|
|
- # break
|
|
|
-
|
|
|
- #item["archivos_perdidos"] = (12 - audios_counter) if audios_counter < 12 else 0
|
|
|
+ encontrados = find_repetitions(results, segments_needed=int(calibration['sn']), calibration=calibration)
|
|
|
+ st.subheader("Encontrados")
|
|
|
+ st.write(encontrados)
|
|
|
except ConnectionError as ex:
|
|
|
log.error('[process_segment] {}'.format(str(ex)))
|
|
|
except UserWarning as warn:
|
|
@@ -142,17 +136,12 @@ def find_repetitions(results, segments_needed=2, calibration=None):
|
|
|
expect_space = False
|
|
|
expect_recover = False
|
|
|
last_value_in_threshold_index = -1
|
|
|
- fall_tolerance = calibration['fallTolerance']
|
|
|
+ fall_tolerance = calibration['tf']
|
|
|
found = []
|
|
|
-
|
|
|
- high = 100 # Obtener este valor desde un parámetro
|
|
|
- middle_high = 50 # Obtener este valor desde un parámetro
|
|
|
- segment_middle_needed = 2 # Obtener este valor desde un parámetro
|
|
|
- found_high = None
|
|
|
- found_middle_high = []
|
|
|
+ last_found = None
|
|
|
|
|
|
if threshold_mode == THRESHOLD_FIXED:
|
|
|
- threshold = calibration['threshold']
|
|
|
+ threshold = int(calibration['th'])
|
|
|
elif threshold_mode == THRESHOLD_AVERAGE:
|
|
|
values = [x['confidence'] for x in results]
|
|
|
threshold = math.ceil(float(sum(values)) / float(len(values)))
|
|
@@ -161,14 +150,6 @@ def find_repetitions(results, segments_needed=2, calibration=None):
|
|
|
segments_needed = 1
|
|
|
|
|
|
for index, result in enumerate(results):
|
|
|
- #if result['confidence'] >= high:
|
|
|
- # if found_high is None:
|
|
|
- # found_high = index
|
|
|
- # elif result['confidence'] > results[found_high]['confidence']:
|
|
|
- # found_high = index
|
|
|
- #elif result['confidence'] >= middle_high:
|
|
|
- # found_middle_high.append(index)
|
|
|
-
|
|
|
if not expect_space:
|
|
|
if result['confidence'] >= threshold:
|
|
|
found_counter += 1
|
|
@@ -182,17 +163,11 @@ def find_repetitions(results, segments_needed=2, calibration=None):
|
|
|
elif fall_tolerance:
|
|
|
if not expect_recover:
|
|
|
if last_value_in_threshold_index != -1:
|
|
|
- """ Solo cuando ya haya entrado por lo menos
|
|
|
- un valor en el rango del threshold, es cuando
|
|
|
- se podrá esperar un valor bajo """
|
|
|
expect_recover = True
|
|
|
found_down_counter += 1
|
|
|
else:
|
|
|
pass
|
|
|
else:
|
|
|
- """ Si después de haber pasado tolerado 1 elemento
|
|
|
- vuelve a salir otro fuera del threshold continuo,
|
|
|
- entonces ya se da por perdido """
|
|
|
found_counter = 0
|
|
|
found_down_counter = 0
|
|
|
found_index = None
|
|
@@ -203,27 +178,17 @@ def find_repetitions(results, segments_needed=2, calibration=None):
|
|
|
found_down_counter = 0
|
|
|
found_index = None
|
|
|
expect_recover = False
|
|
|
- # Aquí veremos si hay un valor alto
|
|
|
- #if found_high is not None:
|
|
|
- # found_row = results[found_high]
|
|
|
- # found.append(found_row)
|
|
|
- #elif len(found_middle_high) >= segment_middle_needed:
|
|
|
- # found_row = results[found_middle_high[0]]
|
|
|
- # found.append(found_row)
|
|
|
- #found_high = None
|
|
|
- #found_middle_high = []
|
|
|
|
|
|
else:
|
|
|
if result['confidence'] <= threshold:
|
|
|
expect_space = False
|
|
|
|
|
|
- if found_counter >= segments_needed:
|
|
|
+ if found_counter >= segments_needed and last_found != found_index:
|
|
|
found_row = results[found_index]
|
|
|
found.append(found_row)
|
|
|
+ last_found = found_index
|
|
|
found_counter = 0
|
|
|
expect_space = True
|
|
|
- #found_high = None
|
|
|
- #found_middle_high = []
|
|
|
|
|
|
return found
|
|
|
|
|
@@ -239,20 +204,40 @@ def limpiar_archivos():
|
|
|
|
|
|
|
|
|
def main():
|
|
|
- if st.button("Limpiar archivos"):
|
|
|
- limpiar_archivos()
|
|
|
|
|
|
- anuncios = st.file_uploader("Elige los anuncios", accept_multiple_files=True, type="mp3")
|
|
|
+ st.subheader('Subir archivos para comparar')
|
|
|
+
|
|
|
+ u1, u2 = st.beta_columns([3, 3])
|
|
|
+ anuncios = u1.file_uploader("Anuncios", accept_multiple_files=True, type="mp3")
|
|
|
for i in range(0, len(anuncios)):
|
|
|
- with open("anuncios/audio-{}.mp3".format(i,), "wb") as audio:
|
|
|
+ with open("anuncios/{}".format(anuncios[i].name,), "wb") as audio:
|
|
|
audio.write(anuncios[i].getvalue())
|
|
|
|
|
|
- grabaciones = st.file_uploader("Elige la grabación", accept_multiple_files=True, type="mp3")
|
|
|
+ grabaciones = u2.file_uploader("Grabaciones", accept_multiple_files=True, type="mp3")
|
|
|
+ grabaciones.sort(key=lambda x: x.name)
|
|
|
for i in range(0, len(grabaciones)):
|
|
|
- with open("grabaciones/audio-{}.mp3".format(i,), "wb") as audio:
|
|
|
+ with open("grabaciones/{}".format(grabaciones[i].name,), "wb") as audio:
|
|
|
audio.write(grabaciones[i].getvalue())
|
|
|
|
|
|
+ if st.button("Borrar archivos anteriores"):
|
|
|
+ limpiar_archivos()
|
|
|
+
|
|
|
+ st.subheader('Parámetros de calibración')
|
|
|
+
|
|
|
+ col1, col2, col3, col4 = st.beta_columns([1,1,1,1])
|
|
|
+ umbral = col1.text_input("Umbral", 12)
|
|
|
+ segmentos_necesarios = col2.text_input("Sementos necesarios", 4)
|
|
|
+ caida = col3.text_input("Tolerancia a caida", 1)
|
|
|
+ segmento = col4.text_input("Tamaño del Segmento", 5)
|
|
|
+
|
|
|
+ calibracion = {
|
|
|
+ "th": umbral,
|
|
|
+ "tf": caida,
|
|
|
+ "sn": segmentos_necesarios,
|
|
|
+ "ss": segmento
|
|
|
+ }
|
|
|
+
|
|
|
if st.button("Comparar"):
|
|
|
- process_segment(anuncios, grabaciones)
|
|
|
+ process_segment(anuncios, grabaciones, calibration=calibracion)
|
|
|
|
|
|
main()
|