annual-stats/main.js
2025-03-02 09:21:28 +01:00

142 lines
7.6 KiB
JavaScript

import { parse } from 'csv-parse/sync';
import * as fs from 'fs';
// Function to truncate floats to a fixed decimal count
function toFixed(num, fixed) {
var re = new RegExp('^-?\\d+(?:\.\\d{0,' + (fixed || -1) + '})?');
return num.toString().match(re)[0];
}
// This is an external output
const totalUsersCount2024 = 120;
// Parse raw data
const fileContents = fs.readFileSync('./banc_de_temps_2024.csv').toString();
let records = parse(fileContents, {
columns: true,
skip_empty_lines: true
});
records = records.map(r=>{
// Transform hours:minutes format into minutes for easier calculation
let firstTime = true;
let consumedMinutes = r.cantidad.split(":").map(t=>parseInt(t)).reduce((acc,val)=>{
if(firstTime) {
firstTime = false;
return acc+val*60;
} else {
return acc+val;
}
},0);
r.cantidad=consumedMinutes;
// Add the date to the activity name so that equally named activities done in different dates count as different for aggregation purposes
let finalActivityName = r.anuncio + "_" + r.fecha
r.anuncio = finalActivityName
return r;
});
// Aggregate activities per person (a person is considered to have participated in an activity when it was either the giver or the taker)
let numberOfActivitiesCount = {};
records.forEach(r=>{
let person = r.dadora in numberOfActivitiesCount ? numberOfActivitiesCount[r.dadora] : { activities_count: 0, total_time: 0 };
person.activities_count++;
person.total_time += r.cantidad;
numberOfActivitiesCount[r.dadora] = person;
person = r.recibidora in numberOfActivitiesCount ? numberOfActivitiesCount[r.recibidora] : { activities_count: 0, total_time: 0 };
person.activities_count++;
person.total_time += r.cantidad;
numberOfActivitiesCount[r.recibidora] = person;
});
// Filter out the user Banc del temps as it is a fake entity and would be an absolute outlier that skews metrics
delete numberOfActivitiesCount["Banc de Temps de Sants"];
// Build the final data array for queries
let activeUsersCount = Object.keys(numberOfActivitiesCount).length;
let activitiesAggregation = Object.keys(numberOfActivitiesCount).map(k=>{
return {
user: k,
activities_count: numberOfActivitiesCount[k].activities_count,
time_spent: {
total: numberOfActivitiesCount[k].total_time,
hours: Math.floor(numberOfActivitiesCount[k].total_time/60),
minutes: numberOfActivitiesCount[k].total_time%60
}
};
});
// Aggregate per category (we need to handle separately activities that are groupal because they skew metrics otherwise)
let groupalCategoriesNames = ["Actividades comunitarias" ]
let nonGroupalCategoriesAggregation = {};
let groupalCategoriesAggregation = {};
records.forEach(r=>{
if(groupalCategoriesNames.includes(r.categoria)) {
// We will subgroup by comment value for the count of a given category. This is tricky because the data itself is neither really well formatted nor clean, so this is an estimation at best
// Attending a group event will count different from leading the event, so each event generates at least between 1 and 2 counts.
// After this first pass, we will need to sum all subcategories into a single count
let groupalCategoryCount = r.categoria in groupalCategoriesAggregation ? groupalCategoriesAggregation[r.categoria] : {};
let groupalSubcategoryCount = r.anuncio in groupalCategoryCount ? groupalCategoryCount[r.anuncio] : 0;
groupalCategoryCount[r.anuncio] = ++groupalSubcategoryCount;
groupalCategoriesAggregation[r.categoria] = groupalCategoryCount;
} else {
let categoryCount = r.categoria in nonGroupalCategoriesAggregation ? nonGroupalCategoriesAggregation[r.categoria] : 0;
nonGroupalCategoriesAggregation[r.categoria] = ++categoryCount;
}
});
groupalCategoriesNames.forEach(categoryName=>{
if(categoryName in groupalCategoriesAggregation) {
let groupalCategoryAggregation = groupalCategoriesAggregation[categoryName];
let categoryCount = Object.keys(groupalCategoryAggregation).length; // We reduce the count for a given groupal category to the count of subcategories. A very coarse calculation, but all other ways are also fake.
groupalCategoriesAggregation[categoryName] = categoryCount; // note: Here we're transforming the value from an object to an int
}
});
// Now we join both groupal and non groupal activities into a single data structure
let categoriesAggregation = {...groupalCategoriesAggregation, ...nonGroupalCategoriesAggregation};
// Here comes another estimation on the number of activities, because we can't really automatically count the real number of activities, since they're not well reported
let totalActivities = Object.keys(categoriesAggregation).map(k=>categoriesAggregation[k]).reduce((acc,value)=>acc+value,0);
let categoriesClassification = Object.keys(categoriesAggregation)
.map(categoryName=>{
return {name: categoryName, count: categoriesAggregation[categoryName], percentage: categoriesAggregation[categoryName]/totalActivities };
})
.sort((cat1,cat2)=>cat2.count - cat1.count);
// Report results
//// Calculate the 10 most active persons (both in number of activities they participated in, and also total time they spent)
let mostActivePersonsPerCount = activitiesAggregation.sort((p1,p2)=>p2.activities_count - p1.activities_count).slice(0,10);
let mostActivePersonsPerTimeSpent = activitiesAggregation.sort((p1,p2)=>p2.time_spent.total - p1.time_spent.total).slice(0,10);
//// Calculate amount of users that have done more than 3 activities and its percentage
let usersWithAtLeast3ActivitiesCount = activitiesAggregation.filter(p=>p.activities_count >= 3).length;
let usersWithAtLeast3Activities = {
count: usersWithAtLeast3ActivitiesCount,
percentage: usersWithAtLeast3ActivitiesCount / totalUsersCount2024
}
//// Calculate amount of users that have done between 1 and 2 activities and its percentage
let usersWithAtLeast2ActivitiesCount = activitiesAggregation.filter(p=>p.activities_count >= 1 && p.activities_count <=2).length;
let usersWithAtLeast2Activities = {
count: usersWithAtLeast2ActivitiesCount,
percentage: usersWithAtLeast2ActivitiesCount / totalUsersCount2024
}
//// Calculate amount of users that have done no activity and its percentage
let usersWithNoActivitiesCount = totalUsersCount2024 - activeUsersCount;
let usersWithNoActivities = {
count: usersWithNoActivitiesCount,
percentage: usersWithNoActivitiesCount / totalUsersCount2024
}
// Output results
console.log(`Numero de eventos total aproximado: ${totalActivities}`);
console.log("\n\n");
console.log("Las 10 personas mas activas - Cantidad de actividades:");
console.log(mostActivePersonsPerCount.map(p=>`${p.user} - ${p.activities_count} actividades`).reduce((acc,val)=>`${acc}\n${val}`));
console.log("\n\n");
console.log("Las 10 personas mas activas - Cantidad de horas:");
console.log(mostActivePersonsPerTimeSpent.map(p=>`${p.user} - ${p.time_spent.hours}:${p.time_spent.minutes} horas`).reduce((acc,val)=>`${acc}\n${val}`));
console.log("\n\n");
console.log(`Numero de personas con 3 o más actividades realizadas: ${usersWithAtLeast3Activities.count} - ${toFixed(usersWithAtLeast3Activities.percentage*100,2)}%`);
console.log(`Numero de personas con entre 1 y 2 actividades realizadas: ${usersWithAtLeast2Activities.count} - ${toFixed(usersWithAtLeast2Activities.percentage*100,2)}%`);
console.log(`Numero de personas sin ninguna actividad realizada: ${usersWithNoActivities.count} - ${toFixed(usersWithNoActivities.percentage*100,2)}%`);
console.log("\n\n");
console.log("Listado de eventos por categorías: ");
console.log(categoriesClassification.map(c => `${c.name}: ${c.count} - ${toFixed(c.percentage*100,2)}%`).reduce((acc,val)=>`${acc}\n${val}`));