import { parse } from 'csv-parse/sync'; import * as fs from 'fs'; // Function to truncate floats to a fixed decimal count function toFixed(num, fixed) { var re = new RegExp('^-?\\d+(?:\.\\d{0,' + (fixed || -1) + '})?'); return num.toString().match(re)[0]; } // This is an external output const totalUsersCount2024 = 120; // Parse raw data const fileContents = fs.readFileSync('./banc_de_temps_2024.csv').toString(); let records = parse(fileContents, { columns: true, skip_empty_lines: true }); records = records.map(r=>{ // Transform hours:minutes format into minutes for easier calculation let firstTime = true; let consumedMinutes = r.cantidad.split(":").map(t=>parseInt(t)).reduce((acc,val)=>{ if(firstTime) { firstTime = false; return acc+val*60; } else { return acc+val; } },0); r.cantidad=consumedMinutes; // Add the date to the activity name so that equally named activities done in different dates count as different for aggregation purposes let finalActivityName = r.anuncio + "_" + r.fecha r.anuncio = finalActivityName return r; }); // Aggregate activities per person (a person is considered to have participated in an activity when it was either the giver or the taker) let numberOfActivitiesCount = {}; records.forEach(r=>{ let person = r.dadora in numberOfActivitiesCount ? numberOfActivitiesCount[r.dadora] : { activities_count: 0, total_time: 0 }; person.activities_count++; person.total_time += r.cantidad; numberOfActivitiesCount[r.dadora] = person; person = r.recibidora in numberOfActivitiesCount ? numberOfActivitiesCount[r.recibidora] : { activities_count: 0, total_time: 0 }; person.activities_count++; person.total_time += r.cantidad; numberOfActivitiesCount[r.recibidora] = person; }); // Filter out the user Banc del temps as it is a fake entity and would be an absolute outlier that skews metrics delete numberOfActivitiesCount["Banc de Temps de Sants"]; // Build the final data array for queries let activeUsersCount = Object.keys(numberOfActivitiesCount).length; let activitiesAggregation = Object.keys(numberOfActivitiesCount).map(k=>{ return { user: k, activities_count: numberOfActivitiesCount[k].activities_count, time_spent: { total: numberOfActivitiesCount[k].total_time, hours: Math.floor(numberOfActivitiesCount[k].total_time/60), minutes: numberOfActivitiesCount[k].total_time%60 } }; }); // Aggregate per category (we need to handle separately activities that are groupal because they skew metrics otherwise) let groupalCategoriesNames = ["Actividades comunitarias" ] let nonGroupalCategoriesAggregation = {}; let groupalCategoriesAggregation = {}; records.forEach(r=>{ if(groupalCategoriesNames.includes(r.categoria)) { // We will subgroup by comment value for the count of a given category. This is tricky because the data itself is neither really well formatted nor clean, so this is an estimation at best // Attending a group event will count different from leading the event, so each event generates at least between 1 and 2 counts. // After this first pass, we will need to sum all subcategories into a single count let groupalCategoryCount = r.categoria in groupalCategoriesAggregation ? groupalCategoriesAggregation[r.categoria] : {}; let groupalSubcategoryCount = r.anuncio in groupalCategoryCount ? groupalCategoryCount[r.anuncio] : 0; groupalCategoryCount[r.anuncio] = ++groupalSubcategoryCount; groupalCategoriesAggregation[r.categoria] = groupalCategoryCount; } else { let categoryCount = r.categoria in nonGroupalCategoriesAggregation ? nonGroupalCategoriesAggregation[r.categoria] : 0; nonGroupalCategoriesAggregation[r.categoria] = ++categoryCount; } }); groupalCategoriesNames.forEach(categoryName=>{ if(categoryName in groupalCategoriesAggregation) { let groupalCategoryAggregation = groupalCategoriesAggregation[categoryName]; let categoryCount = Object.keys(groupalCategoryAggregation).length; // We reduce the count for a given groupal category to the count of subcategories. A very coarse calculation, but all other ways are also fake. groupalCategoriesAggregation[categoryName] = categoryCount; // note: Here we're transforming the value from an object to an int } }); // Now we join both groupal and non groupal activities into a single data structure let categoriesAggregation = {...groupalCategoriesAggregation, ...nonGroupalCategoriesAggregation}; // Here comes another estimation on the number of activities, because we can't really automatically count the real number of activities, since they're not well reported let totalActivities = Object.keys(categoriesAggregation).map(k=>categoriesAggregation[k]).reduce((acc,value)=>acc+value,0); let categoriesClassification = Object.keys(categoriesAggregation) .map(categoryName=>{ return {name: categoryName, count: categoriesAggregation[categoryName], percentage: categoriesAggregation[categoryName]/totalActivities }; }) .sort((cat1,cat2)=>cat2.count - cat1.count); // Report results //// Calculate the 10 most active persons (both in number of activities they participated in, and also total time they spent) let mostActivePersonsPerCount = activitiesAggregation.sort((p1,p2)=>p2.activities_count - p1.activities_count).slice(0,10); let mostActivePersonsPerTimeSpent = activitiesAggregation.sort((p1,p2)=>p2.time_spent.total - p1.time_spent.total).slice(0,10); //// Calculate amount of users that have done more than 3 activities and its percentage let usersWithAtLeast3ActivitiesCount = activitiesAggregation.filter(p=>p.activities_count >= 3).length; let usersWithAtLeast3Activities = { count: usersWithAtLeast3ActivitiesCount, percentage: usersWithAtLeast3ActivitiesCount / totalUsersCount2024 } //// Calculate amount of users that have done between 1 and 2 activities and its percentage let usersWithAtLeast2ActivitiesCount = activitiesAggregation.filter(p=>p.activities_count >= 1 && p.activities_count <=2).length; let usersWithAtLeast2Activities = { count: usersWithAtLeast2ActivitiesCount, percentage: usersWithAtLeast2ActivitiesCount / totalUsersCount2024 } //// Calculate amount of users that have done no activity and its percentage let usersWithNoActivitiesCount = totalUsersCount2024 - activeUsersCount; let usersWithNoActivities = { count: usersWithNoActivitiesCount, percentage: usersWithNoActivitiesCount / totalUsersCount2024 } // Output results console.log(`Numero de eventos total aproximado: ${totalActivities}`); console.log("\n\n"); console.log("Las 10 personas mas activas - Cantidad de actividades:"); console.log(mostActivePersonsPerCount.map(p=>`${p.user} - ${p.activities_count} actividades`).reduce((acc,val)=>`${acc}\n${val}`)); console.log("\n\n"); console.log("Las 10 personas mas activas - Cantidad de horas:"); console.log(mostActivePersonsPerTimeSpent.map(p=>`${p.user} - ${p.time_spent.hours}:${p.time_spent.minutes} horas`).reduce((acc,val)=>`${acc}\n${val}`)); console.log("\n\n"); console.log(`Numero de personas con 3 o más actividades realizadas: ${usersWithAtLeast3Activities.count} - ${toFixed(usersWithAtLeast3Activities.percentage*100,2)}%`); console.log(`Numero de personas con entre 1 y 2 actividades realizadas: ${usersWithAtLeast2Activities.count} - ${toFixed(usersWithAtLeast2Activities.percentage*100,2)}%`); console.log(`Numero de personas sin ninguna actividad realizada: ${usersWithNoActivities.count} - ${toFixed(usersWithNoActivities.percentage*100,2)}%`); console.log("\n\n"); console.log("Listado de eventos por categorías: "); console.log(categoriesClassification.map(c => `${c.name}: ${c.count} - ${toFixed(c.percentage*100,2)}%`).reduce((acc,val)=>`${acc}\n${val}`));