@conference {18091, title = {DRuiD: Designing Reconfigurable Architectures with Decision-making Support}, booktitle = {19th Asia and South Pacific Design Automation Conference (ASP-DAC)}, year = {2014}, month = {01/2014}, address = {Singapore}, url = {http://home.deib.polimi.it/gpalermo/papers/ASPDAC14DRUID.pdf}, author = {Mariani, Giovanni and Meeuws, Roel and Palermo, Gianluca and Sima, Vlad-Mihai and Silvano, Cristina and Bertels, Koen} } @article {17735, title = {ARTE: an Application-specific Run-Time Management Framework for Multi-cores based on Queuing Models}, journal = {Parallel Computing}, year = {2013}, author = {Mariani, Giovanni and Palermo, Gianluca and Zaccaria, Vittorio and Silvano, Cristina} } @article {18048, title = {A Configurable Monitoring Infrastructure for NoC-Based Architectures}, journal = {IEEE Transactions on Very Large Scale Integration (VLSI) Systems}, volume = {PP}, issue = {99}, year = {2013}, abstract = {In this brief, we propose a monitoring architecture for networks-on-chip that provides system information useful for designers to efficiently exploit, at design time and run-time, the system resources available in multiprocessor system-on-chip platforms. We focus on the analysis of the architectural details and design challenges of such a system, by describing powerful tools for monitoring information that can be used both at run-time for detecting dynamic changes in system behavior and at post-execution time for debugging and profiling of applications. This brief describes the design of the monitoring probes, together with the events detectable by them, and discusses an architecture for collecting, storing, and analyzing the information gathered during an application execution.}, keywords = {hardware counters, networks-on-chip (NoCs), performance monitoring, systems-on-chip (SoCs).}, issn = {1063-8210}, doi = {10.1109/TVLSI.2013.2290102}, author = {Fiorin, Leandro and Palermo, Gianluca and Silvano, Cristina} } @article {18093, title = {Design-space Exploration and Runtime Resource Management for Multicores}, journal = {ACM Transactions on Embedded Computing Systems (TECS) - Special issue on application-specific processors}, volume = {13}, issue = {2}, year = {2013}, month = {09/2013}, pages = {20:1{\textendash}20:27}, abstract = {Application-specific multicore architectures are usually designed by using a configurable platform in which a set of parameters can be tuned to find the best trade-off in terms of the selected figures of merit (such as energy, delay, and area). This multi-objective optimization phase is called Design-Space Exploration (DSE). Among the design-time (hardware) configurable parameters we can find the memory subsystem configuration (such as cache size and associativity) and other architectural parameters such as the instruction-level parallelism of the system processors. Among the runtime (software) configurable parameters we can find the degree of task-level parallelism associated with each application running on the platform. The contribution of this article is twofold; first, we introduce an evolutionary (NSGA-II-based) methodology for identifying a hardware configuration which is robust with respect to applications and corresponding datasets. Second, we introduce a novel runtime heuristic that exploits design-time identified operating points to provide guaranteed throughput to each application. Experimental results show that the design-time/runtime combined approach improves the runtime performance of the system with respect to existing reference techniques, while meeting the overall power budget.}, keywords = {application-specific platforms, design-space exploration, genetic algorithms, Multicore architectures, operating systems, resource reservation, runtime resource management, throughput maximization}, issn = {1539-9087}, doi = {10.1145/2514641.2514647}, author = {Mariani, Giovanni and Palermo, Gianluca and Zaccaria, Vittorio and Silvano, Cristina} } @conference {18090, title = {Run-time Optimization of a Dynamically Reconfigurable Embedded System Through Performance Prediction}, booktitle = {23rd International Conference on Field Programmable Logic and Applications (FPL)}, year = {2013}, month = {09/2013}, publisher = {IEEE}, organization = {IEEE}, address = {Porto, Portugal}, doi = {10.1109/FPL.2013.6645523}, author = {Mariani, Giovanni and Sima, Vlad-Mihai and Palermo, Gianluca and Zaccaria, Vittorio and Marchiori, Giacomo and Silvano, Cristina and Bertels, Koen} } @conference {157.mariani2012parma, title = {Evaluating Run-time Resource Management Policies for Multi-core Embedded Platforms with the EMME Evaluation Framework}, booktitle = {Workshop on Parallel Programming and Run-time Management Techniques for Many-core Architectures}, year = {2012}, month = {February}, keywords = {EMME, multi-core, run-time resource management, simulation}, author = {Mariani, Giovanni and Palermo, Gianluca and Zaccaria, Vittorio and Silvano, Cristina} } @article {156.MaPaSiZa12.TCAD, title = {OSCAR: an Optimization Methodology Exploiting Spatial Correlation in Multi-core Design Space}, journal = {IEEE Transactions on Computer-Aided Design}, volume = {21}, number = {-}, issue = {5}, year = {2012}, month = {05/2012}, pages = {740-753}, publisher = {IEEE}, abstract = {This paper presents OSCAR, an Optimization methodology exploiting Spatial CorrelAtion of multi-coRe design space. The paper builds upon the observation that power consumption and performance metrics of spatially close design configurations (or points) are statistically correlated. We propose to exploit the correlation by using a Response Surface Model (RSM), i.e., a closed-form expression suitable for predicting the quality of non-simulated design points. This model is useful during the design space exploration (DSE) phase to quickly converge to the Pareto set of the multi-objective problem without executing lengthy simulations. We compare the proposed heuristic with state-of-the-art approaches (conventional, RSM-based and structured DOEs). Experimental results show that OSCAR is a faster heuristic with respect to state of the art techniques such as Response-Surface Pareto Iterative Refinement - ReSPIR and Nondominated Sorting Genetic Algorithm - NSGA-II. Reported results also show that OSCAR can significantly improve structured DOE approaches by slightly increasing the number of experiments.}, keywords = {chip multi processor, correlation based design, design space exploration, multi-core, multi-objective optimization, OSCAR}, author = {Mariani, Giovanni and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @conference {158.mariani2012date, title = {Using Multi-objective Design Space Exploration to Enable Run-time Resource Management for Reconfigurable Architectures}, booktitle = {Proc. Design, Automation Test in Europe Conf. Exhibition (DATE)}, year = {2012}, month = {March}, author = {Mariani, Giovanni and Sima, Vlad-Mihai and Palermo, Gianluca and Zaccaria, Vittorio and Silvano, Cristina and Bertels, Koen} } @conference {149.MaPaSiZa.SASP11, title = {ARTE: an Application-specific Run-Time Management Framework for Multi-core Systems}, booktitle = {Proceedings IEEE SASP{\textquoteright}11 - Symposium on Application Specific Processors}, year = {2011}, month = {June}, address = {San Diego, CA, USA}, abstract = {Programmable multi-core and many-core platforms increase exponentially the challenge of task mapping and scheduling, provided that enough task-parallelism does exist for each application. This problem worsens when dealing with small ecosystems such as embedded systems-on-chip. In fact, in this case, the assumption of exploiting a traditional operating system is out of context given the memory available to satisfy the run-time footprint of such a configuration. An efficient Run-time Resource Management (RRM) becomes of paramount importance to dispatch tasks to the cores by taking into account the task-parallelization options that each application provides. State-of-the-art approaches to RRM try to allocate re- sources to maximize the instantaneous throughput while meeting a power budget constraint. In this paper, we will show that queuing theory can be an alternative yet effective way of solving resource allocation by presenting ARTE, an Application-specific Run-Time managEment framework. The framework exploits few assumptions about the target many-core computing fabric such as the availability of performance (throughput) information about the platform applications. We will show that this information can be combined, at run-time, with queuing models to enhance the response time of the applications by pounding the actual effect on the system power consumption better than previous approaches. Experimental results show that, compared to reference state-of-the-art RRM techniques, ARTE is able to efficiently improve system performance by pro-actively reducing the response time while meeting the same power consumption requirements. Besides, we will show that the run-time overhead of ARTE does not signicantly impact neither the system performance nor the on-chip-memory occupation.}, doi = {http://dx.doi.org/10.1109/SASP.2011.5941085}, author = {Mariani, Giovanni and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @inbook {18092, title = {Design Space Exploration for Run-Time Management of a Reconfigurable System for Video Streaming}, booktitle = {Multi-objective design space exploration of multiprocessor SoC architectures: the MULTICUBE approach}, year = {2011}, pages = {189-204}, publisher = {Springer}, organization = {Springer}, edition = {1}, isbn = {978-1-4419-8836-2}, author = {Mariani, Giovanni and Avasare, Prabhat and Ykman-Couvreur, Chantal and Vanmeerbeeck, Geert and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio}, editor = {Silvano, Cristina and Fornaciari, William and Villar, Eugenio} } @inbook {138.MaAvYkVaPaSiZa.2011, title = {Design Space Exploration of a Reconfigurable System for Supporting Video Streaming Run-time Management}, booktitle = {Multi-objective design space exploration of multiprocessor SoC architectures: the MULTICUBE approach}, year = {2011}, publisher = {Springer}, organization = {Springer}, address = {New York, USA}, abstract = {This paper reports a case study of Design Space Exploration for supporting Run-time Resource Management (RRM). In particular the management of system resources for an MPSoC dedicated to multiple MPEG4 encoding is addressed in the context of an Automotive Cognitive Safety System (ACSS). The runtime management problem is defined as the minimization of the platform power consumption under resource and Quality of Service (QoS) constraints. The paper provides an insight of both, design-time and run-time aspects of the problem. During the prelimiary design-time Design Space Exploration (DSE) phase, the best configurations of run-time tunable parameters are statically identified for providing the best trade-offs in terms of run-time costs and application QoS. To speed up the optimization process without reducing the quality of final results, a multi-simulator framework is used for modeling platform performance. At run-time, the RRM exploits the design-time DSE results for deciding an operating configuration to be loaded for each MPEG4 encoder. This operation is carried out dynamically, by following the QoS requirements of the specific use-case.}, author = {Mariani, Giovanni and Avasare, Prabhat and Ykman-Couvreur, Chantal and Vanmeerbeeck, Geert and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @inbook {140.KaTuPaSiZaMaBoDo.2011, title = {Design Space Exploration of Parallel Architectures}, booktitle = {Multi-objective design space exploration of multiprocessor SoC architectures: the MULTICUBE approach}, year = {2011}, publisher = {Springer}, organization = {Springer}, address = {New York, USA}, abstract = {This chapter will present two significant applications of the MULTICUBE design space exploration framework. The first part will present the design space exploration of a low power processor developed by STMicroelectronics by using the modeFRONTIER tool to demonstrate the benefits DSE not only in terms of objective quality, but also in terms of impact on the design process within the corporate environment. The second part will describe the application of RSM models developed within MULTICUBE to a tiled, multiple-instruction, many-core architecture developed by ICT China. Overall, the results have showed that different models can present a trade-off of accuracy versus computational effort. In fact, throughout the evaluation, we observed that high accuracy models require high computational time (for both model construction time and prediction time); vice-versa low model construction and prediction time has led to low accuracy.}, author = {Kavka, Carlos and Turco, Alessandro and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio and Mariani, Giovanni and Bocchio, Sara and Dongrui, Fan} } @inbook {144.AvYkVaMaPaSiZa.2011, title = {Design Space Exploration Supporting Run-time Resource Management}, booktitle = {Multi-objective design space exploration of multiprocessor SoC architectures: the MULTICUBE approach}, year = {2011}, publisher = {Springer}, organization = {Springer}, address = {New York, USA}, abstract = {Running multiple applications optimally in terms of Quality of Service (e.g., performance and power consumption) on embedded multi-core platforms is a huge challenge.Moreover, current applications exhibit unpredictable changes of the environment and workload conditions which makes the task of running them optimally even more difficult. This dynamic trend in application runs will grow even more in future applications. This paper presents an automated tool flow which tackles this challenge by a two-step approach: first at design-time, a Design Space Exploration (DSE) tool is coupled with a platform simulator(s) to get optimum operating points for the set of target applications. Secondly, at run-time, a lightweight Run-time Resource Manager (RRM) leverages the design-time DSE results for deciding an operating configuration to be loaded at run-time for each application. This decision is performed dynamically, by taking into consideration available platform resources and the QoS requirements of the specific use-case. To keep RRM execution and resource overhead at minimum, a very fast optimisation heuristic is integrated. Application of this tool-flow on a real-life multimedia use case (described in Chapter 9 of the book of this paper) will demonstrate a significant speedup in optimisation process while maintaining desired Quality of Service.}, author = {Avasare, Prabhat and Ykman-Couvreur, Chantal and Vanmeerbeeck, Geert and Mariani, Giovanni and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @article {145.YkAvMaZaPaSi11, title = {Linking run-time resource management of embedded multi-core platforms with automated design-time exploration}, journal = {IET Computers and Digital Techniques}, volume = {5}, number = {-}, year = {2011}, pages = {123{\textendash}135}, abstract = {Nowadays, owing to unpredictable changes of the environment and workload variation, optimally running multiple applications in terms of quality, performance and power consumption on embedded multi-core platforms is a huge challenge. A lightweight run-time manager, linked with an automated design-time exploration and incorporated in the host processor of the platform, is required to dynamically and efficiently configure the applications according to the available platform resources (e.g. processing elements, memories, communication bandwidth), for minimising the cost (e.g. power consumption), while satisfying the constraints (e.g. deadlines). This study presents a flow linking a design-time design space explorer, coupled with platform simulators at two abstraction levels, with a fast and lightweight priority-based heuristic integrated in the run-time manager to select near-optimal application configurations. To illustrate its feasibility and the very low complexity of the run-time selection, the proposed flow is used to manage the processors and clock frequencies of a multiple-stream MPEG4 encoder chip dedicated to automotive cognitive safety applications.}, doi = {http://dx.doi.org/10.1049/iet-cdt.2010.0030}, author = {Ykman-Couvreur, Chantal and Avasare, Prabhat and Mariani, Giovanni and Zaccaria, Vittorio and Palermo, Gianluca and Silvano, Cristina} } @inbook {17734, title = {The MULTICUBE Design Flow}, booktitle = {Multi-objective Design Space Exploration of Multiprocessor SoC Architectures}, year = {2011}, pages = {3-17}, publisher = {Springer New York}, organization = {Springer New York}, isbn = {978-1-4419-8836-2}, doi = {10.1007/978-1-4419-8837-9_1}, url = {http://dx.doi.org/10.1007/978-1-4419-8837-9_1}, author = {Silvano, Cristina and Fornaciari, William and Palermo, Gianluca and Zaccaria, Vittorio and Castro, Fabrizio and Martinez, Marcos and Bocchio, Sara and Zafalon, Roberto and Avasare, Prabhat and Vanmeerbeeck, Geert and Ykman-Couvreur, Chantal and Wouters, Maryse and Kavka, Carlos and Onesti, Luka and Turco, Alessandro and Bondi, Umberto and Mariani, Giovanni and Posadas, Hector and Villar, Eugenio and Wu, Chris and Dongrui, Fan and Hao, Zhang}, editor = {Silvano, Cristina and Fornaciari, William and Villar, Eugenio} } @inbook {139.Sietal2.2011, title = {MULTICUBE: Multi-Objective Design Space Exploration of Multi-Core Architectures}, booktitle = {VLSI 2010 Annual Symposium}, volume = {105}, year = {2011}, pages = {47-63}, publisher = {Springer}, organization = {Springer}, address = {Netherlands}, abstract = {Given the increasing complexity of Chip Multi-Processors (CMPs), a wide range of architecture parameters must be explored at design time to find the best trade-off in terms of multiple competing objectives (such as energy, delay, bandwidth, area, etc.) The design space of the target architectures is huge because it should consider all possible combinations of each hardware parameter (e.g., number of processors, processor issue width, L1 and L2 cache sizes, etc.). In this complex scenario, intuition and past experience of design architects is no more a sufficient condition to converge to an optimal design of the system. Indeed, Automatic Design Space Exploration (DSE) is needed to systematically support the analysis and quantitative comparison of a large amount of design alternatives in terms of multiple competing objectives (by means of Pareto analysis). The main goal of the MULTICUBE project consists of the definition of an automatic Design Space Exploration framework to support the design of next generation many-core architectures.}, isbn = {978-94-007-1487-8}, url = {http://dx.doi.org/10.1007/978-94-007-1488-5_4}, author = {Silvano, Cristina and Fornaciari, William and Palermo, Gianluca and Zaccaria, Vittorio and Castro, Fabrizio and Martinez, Marcos and Bocchio, Sara and Zafalon, Roberto and Avasare, Prabhat and Vanmeerbeeck, Geert and Ykman-Couvreur, Chantal and Wouters, Maryse and Kavka, Carlos and Onesti, Luka and Turco, Alessandro and Bondi, Umberto and Mariani, Giovanni and Posadas, Hector and Villar, Eugenio and Wu, Chris and Dongrui, Fan and Hao, Zhang and Shibin, Tang} } @inbook {143.RiKaTuPaSiZaMa.2011, title = {Optimization Algorithms for Embedded System Design Space Exploration}, booktitle = {Multi-objective design space exploration of multiprocessor SoC architectures: the MULTICUBE approach}, year = {2011}, publisher = {Springer}, organization = {Springer}, address = {New York, USA}, abstract = {This paper is dedicated to the optimization algorithms developed in the MULTICUBE project and to their surrounding environment. Two software design space exploration (DSE) tools host the algorithms: Multicube Explorer and mode-FRONTIER. The description of the proposed algorithms is the central part of the paper. The focus will be on newly developed algorithms and on ad-hoc extensions of existing techniques in order to face with discrete and categorical design space parameters that are very common when working with embedded systems design. This paper will also provide some fundamental guidelines to build a strategy for testing the performance and accuracy of such algorithms. The aim is mainly to build confidence in optimization techniques, rather than to simply compare one algorithm versus another one. The no-free-lunch theorem for optimization has to be taken into consideration and therefore the analysis will look forward to robustness and industrial reliability of the results.}, author = {Rigoni, Enrico and Kavka, Carlos and Turco, Alessandro and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio and Mariani, Giovanni} } @inbook {142.PaSiZaRiKaTuMa.2011, title = {Response Surface Modeling for Embedded System Design Space Exploration}, booktitle = {Multi-objective design space exploration of multiprocessor SoC architectures: the MULTICUBE approach}, year = {2011}, publisher = {Springer}, organization = {Springer}, address = {New York, USA}, abstract = {A typical design space exploration flow involves an event-based simulator in the loop, often leading to an actual evaluation time that can exceed practical limits for realistic applications. Chip multi-processor architectures further exacerbate this problem given that the actual simulation speed decreases by increasing the number of cores of the chip. Traditional design space exploration lacks of efficient techniques that reduce the number of architectural alternatives to be analyzed. In this chapter, we introduce a set of statistical and machine learning techniques that can be used to predict system level metrics by using closed-form analytical expressions instead of lengthy simulations; the latter are called Response Surface Models (RSM). The principle of RSM is to exploit a set of simulations generated by one or more Design of Experiments strategies to build a surrogate model to predict the system-level metrics. The response model has the same input and output features of the original simulation based model but offers significant speed-up by leveraging analytical, closed-form functions which are tuned during model training. The techniques presented in this chapter can be used to improve the performance of traditional design space exploration algorithms such as those presented in Chap. 3.}, author = {Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio and Rigoni, Enrico and Kavka, Carlos and Turco, Alessandro and Mariani, Giovanni} } @conference {126.MaPaZaBrJoSi11, title = {A Correlation-based Design Space Exploration Methodology for Multi-Processor Systems-on-Chip}, booktitle = {Proceedings of DAC 2010: Design Automation Conference}, year = {2010}, month = {June}, pages = {120{\textendash}125}, address = {Anheim, CA, USA}, abstract = {Given the increasing complexity of multi-processor systems-on-chip, a wide range of parameters must be tuned to find the best trade-offs in terms of the selected system figures of merit (such as energy, delay and area). This optimization phase is called Design Space Exploration (DSE) consisting of a Multi-Objective Optimization (MOO) problem. In this paper, we propose an iterative design space exploration methodology exploiting the statistical properties of known system configurations to infer, by means of a correlation-based analysis, the next design points to be analyzed with low-level simulations. In fact, the knowledge of few design points is used to predict the expected improvement of unknown configurations. We show that the correlation of the configurations within the multi-processor design space can be modeled successfully with analytical functions and, thus, speed up the overall exploration phase. This makes the proposed methodology a model-assisted heuristic that, for the first time, exploits the correlation about architectural configurations to converge to the solution of the multi-objective problem.}, keywords = {design space exploration, kriging, multiprocessor system-on-chip (MPSoC), response surface}, author = {Mariani, Giovanni and Palermo, Gianluca and Zaccaria, Vittorio and Brankovic, Aleksandar and Jovic, Jovana and Silvano, Cristina} } @conference {117.MaAvVaYkPaSiZa10, title = {An industrial design space exploration framework for supporting run-time resource management on multi-core systems}, booktitle = {Proceedings of Design, Automation and Test in Europe (DATE) Conference}, year = {2010}, month = {March}, address = {Dresden, Germany}, abstract = {Current multi-core design methodologies are facing increasing unpredictability in terms of quality due to the actual diversity of the workloads that characterize the deployment scenario. To this end, these systems expose a set of dynamic parameters which can be tuned at run-time to achieve a specified Quality of Service (QoS) in terms of performance. A run-time manager operating system module is in charge of matching the specified QoS with the available platform resources by manipulating the overall degree of task-level parallelism of each application as well as the frequency of operation of each of the system cores. In this paper, we introduce a design space exploration framework for enabling and supporting enhanced resource management through software re-configuration on an industrial multicore platform. From one side, the framework operates at design time to identify a set of promising operating points which represent the optimal trade-off in terms of the target power consumption and performance. The operating points are used after the system has been deployed to support an enhanced resource management policy. This is done by a light-weight resource management layer which filters and selects the optimal parallelism of each application and operating frequency of each core to achieve the QoS constraints imposed by the external world and/or the user. We show how the proposed design-time and run-time techniques can be used to optimally manage the resources of a multiple-stream MPEG4 encoding chip dedicated to automotive cognitive safety tasks.}, author = {Mariani, Giovanni and Avasare, Prabhat and Vanmeerbeeck, Geert and Ykman-Couvreur, Chantal and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @conference {119.AvVaYkMaPaZaSi10, title = {Linking run-time management with design space exploration at multiple abstraction levels}, booktitle = {Proceedings of the DATE{\textquoteright}10 workshop on Designing for Embedded Parallel Computing Platforms: Architectures, Design Tools, and Applications}, year = {2010}, month = {March}, address = {Dresden, Germany}, abstract = {In present era of Multi-Processor System-on-Chip (MPSoC) embedded devices, to run multiple applications optimally (in terms of execution time and power consumption) is an enormous challenge. Embedded designers usually tackle this challenge by dividing it in two parts : at design-time Design Space Explorations (DSE) are performed to derive Pareto set of optimum operating points for each application and at run-time embedded device is monitored continuously to operate at one of the points in the derived Pareto set. Obviously run-time management relies heavily on accuracy of DSE. With growing complexity of embedded devices and with time-to-market pressures, at design-time, it is not trivial to derive the operating point Pareto set. On the other hand, at run-time, overhead introduced by a run-time management scheme should also not be high so as to minimally affect embedded device performance . We have developed techniques to tackle these embedded design issues. At design time, we use DSE with multiple simulators running at multiple abstraction levels to converge quickly to Pareto set of operating points. At runtime, to keep run-time overhead to a minimum, a hierarchical Runtime Resource Manager (RRM) is used with well-defined interfaces (services) between global and local resource managers. We applied our methodology on an embedded device having eight processor cores running multiple MPEG4 encoders. With our DSE methodology, we could derive Pareto set much quickly (as compared to full-space explorations). With our run-time schemes, overhead introduced by run-time manager was negligible.}, author = {Avasare, Prabhat and Vanmeerbeeck, Geert and Ykman-Couvreur, Chantal and Mariani, Giovanni and Palermo, Gianluca and Zaccaria, Vittorio and Silvano, Cristina} } @conference {136.FiPaSi10, title = {A Monitoring System for NoCs}, booktitle = {Proceedings of the Third International Workshop on Network on Chip Architectures (NoCArc{\textquoteright}2010)}, year = {2010}, month = {December}, address = {Atlanta, Georgia, USA}, abstract = {In this paper, we propose and discuss a monitoring architecture for Networks-on-Chip (NoCs) that provides system information useful for helping designers in efficiently exploiting resources available in new complex Multiprocessor System-on-Chip (MPSoC) platforms, and in understanding their behavior. We focus on the analysis of the architectural details and design challenges of such systems, by describing power- ful tools for detecting information that can be used both at run-time for detecting dynamic changes in system behavior and at post-execution time for debugging and profiling of applications. We detail the design of the probes monitoring the events and discuss an architecture for collection, storage, and analysis of information generated by them. We evaluate cost of the implementation of the system in terms of area and traffic overhead, and we present results obtained when monitoring a use-case multimedia application.}, keywords = {hardware counters, network-on-chip (NoC), performance monitoring, system-on-chip (SoC)}, author = {Fiorin, Leandro and Palermo, Gianluca and Silvano, Cristina} } @conference {115.ZaPaCaSiMa10, title = {Multicube Explorer: An Open Source Framework for Design Space Exploration of Chip Multi-Processors}, booktitle = {2PARMA: Proceedings of the Workshop on Parallel Programming and Run-time Management Techniques for Many-core Architectures}, year = {2010}, month = {February}, address = {Hannover, Germany}, abstract = {Given the increasing complexity of Chip Multi-Processors (CMPs), a wide range of architecture parameters must be explored at design time to find the best trade-off in terms of multiple competing objectives (such as energy, delay, bandwidth, area, etc.) The design space of the target architecture is huge because of it should consider all possible combinations of each parameter (number of processors, processor issue width, L1 and L2 cache sizes, etc.). In this complex scenario, the multi-objective exploration of the huge design space of next generation CMPs cannot be anymore based on intuition and past experience of the design architects. An Automatic Design Space Exploration methodology is needed to support systematically the exploration and the quantitative comparison of the design alternatives in terms of multiple competing objectives (Pareto analysis). An overall design space exploration framework is needed to combine all optimizations into a global search space with a common interface to the simulation and evaluation tools. Our work1 focuses on the definition of an automatic multi-objective Design Space Exploration (DSE) framework for tuning Chip Multi-Processor architectures by evaluating a set of metrics (such as energy and delay) for the next generation embedded computing platforms. Multicube Explorer is an interactive open-source framework to enable the designer to automatically explore a design space of configurations for a parameterized architecture for which an executable model (use case simulator) exists. Multicube Explorer is an advanced multi-objective optimization framework which is entirely command-line/script driven and can be re-targeted to any configurable platform by writing a suitable XML design space definition file and providing a configurable simulator}, author = {Zaccaria, Vittorio and Palermo, Gianluca and Castro, Fabrizio and Silvano, Cristina and Mariani, Giovanni} } @conference {129.Sietal.ISVLSI11, title = {Multicube: Multi-objective design space exploration of multi-core architectures}, booktitle = {ISVLSI 2010: IEEE Annual Symposium on VLSI}, year = {2010}, month = {July}, pages = {488{\textendash}493}, address = {Lixouri, Kefalonia - Greece}, abstract = {Technology trends enable the integration of many processor cores in a System-on-Chip (SoC). In these complex architectures, several architectural parameters can be tuned to find the best trade-off in terms of multiple metrics such as energy and delay. The main goal of the MULTICUBE project consists of the definition of an automatic Design Space Exploration framework to support the design of next generation many-core architectures.}, doi = {http://dx.doi.org/10.1109/ISVLSI.2010.67}, author = {Silvano, Cristina and Fornaciari, William and Palermo, Gianluca and Zaccaria, Vittorio and Castro, Fabrizio and Martinez, Marcos and Bocchio, Sara and Zafalon, Roberto and Avasare, Prabhat and Vanmeerbeeck, Geert and Ykman-Couvreur, Chantal and Wouters, Maryse and Kavka, Carlos and Onesti, Luka and Turco, Alessandro and Bondi, Umberto and Mariani, Giovanni and Posadas, Hector and Villar, Eugenio and Wu, Chris and Dongrui, Fan and Hao, Zhang and Shibin, Tang} } @conference {98.MaPaSiZa309, title = {A Design Space Exploration Methodology Supporting Run-Time Resource Management for Multi-Processors System on-Chip}, booktitle = {Proceedings IEEE SASP{\textquoteright}09 - Symposium on Application Specific Processors}, year = {2009}, month = {July}, address = {San Francisco, CA, USA}, abstract = {Application Specific multi-processor Systems-on-chip are currently designed by using platform-based synthesis techniques. In this approach, a wide range of platform parameters are tuned either at design-time or at run-time, to provide the best trade-offs in terms of the selected system figures of merit (such as power and throughput) for a dynamic application-specific workload. Among the design-time (hardware) configurable parameters we can find the memory sub-system configuration (e.g. cache size and associativity) and other architectural parameters such as the instruction-level parallelism of the system processors. Among the run-time (software) configurable parameters we can find the overall degree of task-level parallelism associated with each application running on the chip. Typically, while the design-time exploration is performed in the early development stages for a set of static parameters, the tuning of the run-time parameters is performed dynamically by a run-time management software module after the system has been deployed. In this paper, we introduce a methodology for identifying a hardware configuration which is robust with respect to the variable workload scenario introduced by the run-time management. Moreover, the proposed methodology is aimed at providing useful information about the optimal software operating points of the applications in terms of task-level parallelism. The proposed methodology is based on the NSGA-II evolutionary heuristic algorithm assisted by an Artificial Neural Network (ANN). We then introduce a run-time management policy which is able to exploit the above information to maximize the performance of the system under power budget constraints. Experimental results show that the proposed technique is able to reduce the overall design space exploration time yet providing a near-optimal solution, in terms of hardware parameters, to enable an innovative and efficient run-time anagement policy.}, keywords = {artificial neural network, design space exploration, meta-model assisted optimization, multi-objective optimization, multiprocessor system-on-chip (MPSoC), run-time resource management}, doi = {http://dx.doi.org/10.1109/SASP.2009.5226331}, author = {Mariani, Giovanni and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @conference {101.MaPaSiZa209, title = {Meta-model Assisted Optimization for Design Space Exploration of Multi-Processor Systems-on-Chip}, booktitle = {Euromicro Proceedings of DSD{\textquoteright}09 - Conference on Digital System Design}, year = {2009}, month = {August}, address = {Patras, Greece}, abstract = {Multi-processor Systems-on-chip are currently designed by using platform-based synthesis techniques. In this approach, a wide range of platform parameters are tuned to find the best trade-offs in terms of the selected system figures of merit (such as energy, delay and area). This optimization phase is called Design Space Exploration (DSE) and it generally consists of a Multi-Objective Optimization (MOO) problem. The design space of a Multi-processor architecture is too large to e evaluated comprehensively. So far, several heuristic techniques have been proposed to address the MOO problem, but they are haracterized by low efficiency to identify the Pareto front. In this paper, we address the MPSoC DSE problem by using an NSGA-II modified to be assisted by an Artificial Neural Network (ANN). In particular we exploit statistical methods to compute the prediction confidence intervals for the ANN approximations. These information are adopted in the evolution control strategy in order to carefully select which individuals should be simulated. Experimental results show that the proposed techniques is able to reduce the simulations needed for the optimization without decreasing the quality of the obtained Pareto Front. Results are compared with state of the art techniques to demonstrate that optimization time due to simulation can be speed up by adopting statistical methods during evolution control.}, keywords = {artificial neural network, meta-model assisted optimization, multi-objective optimization, multiprocessor system-on-chip (MPSoC)}, doi = {http://dx.doi.org/10.1109/DSD.2009.154}, author = {Mariani, Giovanni and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @conference {95.FiPaSi09, title = {MPSoCs Run-Time Monitoring through Networks-on-Chip}, booktitle = {The 2009 Conference on Design, Automation and Test In Europe (DATE{\textquoteright}09)}, year = {2009}, month = {April/2009}, address = {Nice, France}, abstract = {Networks-on-Chip (NoCs) have appeared as design strategy to overcome the limitations, in terms of scalability, efficiency, and power consumption of current buses. In this paper, we discuss the idea of using NoCs to monitor system behaviour at run-time by tracing activities at initiators and targets. Main goal of the monitoring system is to retrieve information useful for run-time optimization and resources allocation in adaptive systems. Information detected by probes embedded within NIs is sent to a central unit, in charge of collecting and elaborating the data. We detail the design of the basic blocks and analyse the overhead associated with the ASIC implementation of the monitoring system, as well as discussing implications in terms of the additional traffic generated in the NoC.}, keywords = {monitoring, network-on-chip (NoC)}, author = {Fiorin, Leandro and Palermo, Gianluca and Silvano, Cristina} } @conference {96.MaPaSiZa09.2, title = {Multicube Explorer - A Design Space Exploration Framework for Embedded Systems-on-Chip}, booktitle = {Proceedings of the DATE{\textquoteright}09 workshop on Designing for Embedded Parallel Computing Platforms: Architectures, Design Tools, and Applications}, year = {2009}, month = {April}, address = {Nice, France}, abstract = {Multicube Explorer is a design space exploration tool for supporting platform-based design. It allows a fast optimization of parameterized system architecture towards a set of objective functions (e.g., energy, delay and area), by interacting with a system-level simulator. Multicube Explorer provides a set of innovative sampling and optimization techniques to help finding the best objective function trade-offs. It also provides an open XML interface for supporting new platforms/architectures.}, keywords = {area, delay, energy, fast optimization, multicube explorer, optimization techniques, platform-based design, system-level simulator, XML}, author = {Mariani, Giovanni and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @conference {97.Silvanoetal09, title = {MULTICUBE: Multi-Objective Design Space Exploration of Multiprocessor Architectures for Embedded Multimedia Applications}, booktitle = {Proceedings of the DATE{\textquoteright}09 workshop on Designing for Embedded Parallel Computing Platforms: Architectures, Design Tools, and Applications}, year = {2009}, month = {April}, address = {Nice, France}, author = {Silvano, Cristina and Palermo, Gianluca and Zaccaria, Vittorio and Fornaciari, William and Zafalon, Roberto and Bocchio, Sara and Martinez, Marcos and Wouters, Maryse and Vanmeerbeeck, Geert and Avasare, Prabhat and Onesti, Luka and Kavka, Carlos and Bondi, Umberto and Mariani, Giovanni and Villar, Eugenio and Posadas, Hector and Wu, Chris and Dongrui, Fan and Hao, Zhang} } @conference {99.MaPaSiZa09, title = {Multiprocessor System-on-Chip Design Space Exploration based on Multi-level Modeling Techniques}, booktitle = {Proceedings of IEEE IC-SAMOS{\textquoteright}09 - International Conference on Embedded Computer Systems: Architectures, MOdeling, and Simulation}, year = {2009}, month = {July}, address = {Samos, Greece}, abstract = {Multi-processor Systems-on-chip are currently designed by using platform-based synthesis techniques. In this approach, a wide range of platform parameters are tuned to find the best trade-offs in terms of the selected system figures of erit (such as energy, delay and area). This optimization phase is called Design Space Exploration (DSE) and it generally consists of a Multi-Objective Optimization (MOO) problem. The design space of a Multi-processor architecture is too large to be evaluated comprehensively. So far, several heuristic techniques have been proposed to address the MOO problem, but they are haracterized by low efficiency to identify the Pareto set. In this paper we propose a methodology for heuristic platform based design based on evolutionary algorithms and multi-level simulation techniques. In particular, we extend the NSGA-II with an approximate neural network meta-model for multi-processor architectures in order to replace expensive platform simulations with fast meta-model evaluation. The model accuracy and efficiency is improved by exploiting high-level platform simulation techniques. High-level simulation allows us to reduce the overall complexity of the neural network and improving its prediction power. Experimental results show that the proposed techniques is able to reduce the number of simulations needed for the optimization without decreasing the quality of the obtained Pareto set. Results are compared with state of the art echniques to demonstrate that optimization time due to simulation can be sped up by adopting multi-level simulation techniques.}, keywords = {artificial neural network, design space exploration, genetic algorithm, multi level modelling, multiprocessor system-on-chip (MPSoC), platform-based design}, doi = {http://dx.doi.org/10.1109/ICSAMOS.2009.5289222}, author = {Mariani, Giovanni and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @Patent {87.pat07301411.0-2413PATENT, title = {Programmable data protection device, secure programming manager system and process for controlling access to an interconnect network for an integrated circuit}, number = {EP 20070301411}, year = {2009}, month = {04/2009}, type = {Application}, chapter = {EP 2043324 A1}, abstract = {A data protection device for an interconnect network on chip (NoC) includes a header encoder that receives input requests for generating network packets. The encoder routes the input requests to a destination address. An access control unit controls and allows access to the destination address. The access control unit uses a memory to store access rules for controlling access to the network as a function of the destination address and of a source of the input request.}, issn = {EP 2043324 A1}, author = {Fiorin, Leandro and Palermo, Gianluca and Silvano, Cristina and Catalano, Valerio and Locatelli, Riccardo and Coppola, Marcello} } @inbook {92.FiPaSi09.2, title = {Security in NoC}, booktitle = {Networks-on-Chips: Theory and Practice}, year = {2009}, pages = {157-194}, publisher = {Taylor and Francis Group, LLC - CRC Press}, organization = {Taylor and Francis Group, LLC - CRC Press}, abstract = {Future integrated systems will contain billion of transistors, composing tens to hundreds of IP cores. These IP cores, implementing emerging complex multimedia and network ap- plications, should be able to deliver rich multimedia and networking services. An efficient cooperation among these IP cores (e.g., efficient data transfers) can be achieved through utilization of the available resources. The design of such complex systems includes several challenges to be addressed. Among others one challenge is to design an on-chip interconnection network that should be able to efficiently connect the IP cores. Another challenge is to derive such an application mapping that will make efficient usage of the available hardware resources . An architecture that is able to accommodate such a high number of cores, satisfying the need for commu- nication and data transfers, is the Network-on-Chip (NoC) architecture. For these reasons Networks-on-Chip become a popular choice for designing the on-chip interconnect for Systems-on-Chip (MPSoCs), and are supported from the industry (such as the Ethereal NoC from Philips, the STNoC from STMicroelectronics and an 80-core NoC from Intel). As it is presented in , the key design challenges of emerging NoC design are a) the communication infrastructure, b) the communication paradigm selection and c) the application mapping optimization.}, keywords = {network-on-chip (NoC), security}, author = {Fiorin, Leandro and Palermo, Gianluca and Silvano, Cristina and Elmiligi, Haytham}, editor = {Gebali, Fayez and El-Kharashi, Watheq} } @conference {111.ChPaSiZa10, title = {Yield Enhancement by Robust Application-specific Mapping on Network-on-Chips}, booktitle = {NoCArc{\textquoteright}09: Proceedings of the Second International Workshop on Network on-Chip Architectures}, year = {2009}, month = {December}, pages = {37{\textendash}42}, address = {New York City, USA}, abstract = {The current technological defect densities and production yields are a motivating factor supporting the introduction of design-for-manufacturability techniques during the highlevel design of complex, embedded systems based on networkon- chips (NoCs). In this context, we tackle the problem of mapping the IPs of a multi-processing system to the NoC nodes, by taking into account the effective robustness of the system with respect to permanent faults in the interconnection network due to manufacturing defects. In particular, we introduce an application specific methodology for identifying optimal NoCs mappings which minimize the variance of the system power and latency and maximizes the probability that the actual system will work when deployed, even in presence of faulty NoC links. We provide experimental results by comparing the proposed methodology with conventional mapping approaches, by highlighting benefits and drawbacks of both techniques}, author = {Choudhury, Anirban Dutta and Palermo, Gianluca and Silvano, Cristina and Zaccaria, Vittorio} } @conference {90.MaPaZaSi08, title = {An Efficient Design Space Exploration Methodology for Multi-Cluster VLIW Architectures based on Artificial Neural Networks}, booktitle = {Proc. IFIP International Conference on Very Large Scale Integration VLSI - SoC 2008}, year = {2008}, month = {October 13-15}, address = {Rhodes Island, Greece}, abstract = {Multi-Cluster Very Long Instruction Word (VLIW) architectures are currently designed by using platform-based synthesis techniques. In these approaches, a wide range of platform parameters is tuned to find the best trade-offs in terms of the selected figures of merit (such as energy, delay and area). This optimization phase is called Design Space Exploration (DSE) and it generally consists of a Multi-Objective Optimization (MOO) problem. The design space for a Multi-Cluster architecture is too large to be evaluated comprehensively. So far, several heuristic techniques have been proposed to address the MOO problem, but they are characterized by low efficiency to identify the Pareto front. In this paper, we propose an efficient DSE methodology leveraging neural networks. In particular, an initial design-of-experiments phase is used for generating a coarse view of the target design space; neural networks are then trained and used to refine the exploration, by identifying efficiently the Pareto points of the design space. This process is iteratively repeated until the target criterion (convergence of the Pareto coverage) is satisfied. A set of experimental results are reported to trade-off accuracy and efficiency of the proposed techniques with actual workloads.}, keywords = {design space exploration, multi-objective optimization, neural networks, response surface, system-on-chip (SoC), very long instruction words (VLIW)}, author = {Mariani, Giovanni and Palermo, Gianluca and Zaccaria, Vittorio and Silvano, Cristina} } @article {84.AlGaSte2008, title = {Secure Memory Accesses on Networks-on-Chip}, journal = {IEEE Transactions on Computers}, volume = {57}, number = {9}, year = {2008}, month = {September}, pages = {1216-1229}, abstract = {Security is gaining relevance in the development of embedded devices. Towards a secure system at each level of design, this paper addresses security aspects related to Network on Chip (NoC) architectures, foreseen as the communication infrastructure of next-generation embedded devices. In the context of NoC-based multiprocessor systems, we focus on the topic, not yet thoroughly faced, of data protection. In this paper, we present a secure NoC architecture composed of a set of Data Protection Units (DPUs) implemented within the Network Interfaces (NIs)1. The run-time configuration of the programmable part of the DPUs is managed by a central unit, the Network Security Manager (NSM). The DPU, similar to a firewall, can check and limit the access rights (none, read, write, or both) of processors accessing data and instructions in a shared memory. In particular, the DPU can distinguish between the operating roles (supervisor/user and secure/non secure) of the processing elements.We explore alternative implementations of the DPU and demonstrate how this unit does not affect the network latency if the memory request has the appropriate rights. We also focus on the dynamic updating of the DPUs to support their utilization in dynamic environments, and on the utilization of authentication techniques to increase the level of security.}, keywords = {data protection, embedded systems, multiprocessor system-on-chip (MPSoC), network-on-chip (NoC), security}, doi = {http://dx.doi.org/10.1109/TC.2008.69}, author = {Fiorin, Leandro and Palermo, Gianluca and Lukovi{\'c}, Slobodan and Catalano, Valerio and Silvano, Cristina} } @conference {88.FiPaSi08, title = {A Security Monitoring Service for NoCs}, booktitle = {Sixth IEEE/ACM/IFIP International Conference on Hardware/Software Codesign and System Synthesis (CODES+ISSS{\textquoteright}08)}, year = {2008}, month = {10/2008}, address = {Atlanta, Georgia, USA.}, abstract = {As computing and communications increasingly pervade our lives, security and protection of sensitive data and systems are emerging as extremely important issues. Networks-on- Chip (NoCs) have appeared as design strategy to cope with the rapid increase in complexity of Multiprocessor Systems- on-Chip (MPSoCs), but only recently research community have addressed security on NoC-based architectures. In this paper, we present a monitoring system for NoC based architectures, whose goal is to help detect security violations carried out against the system.Information col- lected are sent to a central unit for efficiently counteracting actions performed by attackers.We detail the design of the basic blocks and analyse overhead associated with the ASIC implementation of the monitoring system, discussing type of security threats that it can help detect and counteract.}, keywords = {embedded systems, multiprocessor system-on-chip (MPSoC), network-on-chip (NoC), security}, doi = {http://dx.doi.org/10.1145/1450135.1450180}, author = {Fiorin, Leandro and Palermo, Gianluca and Silvano, Cristina} } @conference {61.PaMaSiLo07, title = {Application-Specific Topology Design Customization for STNoC}, booktitle = {DSD07, in proceedings of 10th EUROMICRO Conference on Digital System Design Architectures, Methods and Tools (DSD 07)}, year = {2007}, month = {August 29-31}, address = {L{\"u}beck, Germany}, abstract = {Customized network-oriented communication architectures have recently become a must to support high bandwidth SoCs. To this end, a corresponding communication design flow is necessary to support the design space exploration of complex SoCs with tight design constraints. In order to exploit the benefits introduced by the NoC approach for the on-chip communication, the paper presents a Pareto Simulated Annealing (PSA) approach for the customization of the network topology. The proposed PSA approach has been applied to STNoC, the Network on Chip developed by STMicroelectronics. Starting from the ring topology, the proposed application-specific design flow tries to find a set of customized topologies (optimized in terms of performance and area/energy overhead) by adding custom links up to the spidergon topology.}, keywords = {application specific design, mapping, network-on-chip (NoC), STNoC, topology customization}, doi = {http://dx.doi.org/10.1109/DSD.2007.4341522}, author = {Palermo, Gianluca and Mariani, Giovanni and Silvano, Cristina and Locatelli, Riccardo and Coppola, Marcello} } @conference {68.FiPaLuSi07, title = {A Data protection Unit for NoC-based Architecture}, booktitle = {CODES+ISSS 2007, in proceedings of the Fifth IEEE/ACM/FIP International Conference of Hardware/Software Codesign and System Synthesis (CODES+ISSS 2007)}, year = {2007}, month = {September 30}, address = {Salzburg, Austria}, abstract = {Security is gaining increasing relevance in the development of embedded devices. Towards a secure system at each level of design, this paper addresses the security aspects related to Network-on-Chip (NoC) architectures, foreseen as the communication infrastructure of next generation embedded devices. In the context of NoC-based Multiprocessor systems, we focus on the topic, not thoroughly faced yet, of data protection. We present the architecture of a Data Protection Unit (DPU) designed for implementation within the Network Interface (NI). The DPU supports the capability to check and limit the access rights(none, read, write or both) of processors requesting access to data locations in a shared memory - in particular distinguishing between the operating roles (supervisor or user) of processing elements. We explore different alternative implementations and demonstrate how the DPU unit does not affect the network latency if the memory request has the appropriate rights. In the experimental section we show synthesis results for different ASIC implementations of the Data Protection Unit.}, keywords = {data protection, embedded systems, multiprocessor system-on-chip (MPSoC), network-on-chip (NoC), security}, doi = {http://dx.doi.org/10.1145/1289816.1289858}, author = {Fiorin, Leandro and Palermo, Gianluca and Lukovi{\'c}, Slobodan and Silvano, Cristina} } @conference {57.PaMaSiLoCo07, title = {Mapping and Topology Customization Approaches for Application-Specific STNoC Designs}, booktitle = {IEEE Proceedings of ASAP{\textquoteright}07 - 18th International Conference on Application-specific Systems, Architectures and Processors}, year = {2007}, month = {July}, address = {Montr{\'e}al, Qu{\'e}bec, Canada}, abstract = {Application-specific network-oriented communication architectures have recently become an effective solution to support high bandwidth Systems on-Chip. The Network on-Chip architectures considered so far range from regular to fully customized topologies for application specific designs requiring high-level bandwidth. To this end, a networkcentric design flow is necessary to support the design space exploration of complex SoCs with tight design constraints. This paper introduces four different approaches based on the orthogonalization of core mapping and topology customization applied to STNoC, the Network on-Chip developed by STMicroelecronics. The four methods are derived from the combination of the initial mappings to two standard topologies (ring and spidergon) with two types of topology customization based on the insertion of cross-links to reduce the network distance of standard topologies}, keywords = {application specific design, mapping, network-on-chip (NoC), STNoC, topology customization}, doi = {http://dx.doi.org/10.1109/ASAP.2007.4429959}, author = {Palermo, Gianluca and Mariani, Giovanni and Silvano, Cristina and Locatelli, Riccardo and Coppola, Marcello} } @conference {60.FiSiSa07, title = {Security Aspects in Networks-on-Chips: Overview and Proposals for Secure Implementations}, booktitle = {DSD07, in proceedings of 10th EUROMICRO Conference on Digital System Design Architectures, Methods and Tools (DSD 07)}, year = {2007}, month = {August 29-31}, address = {L{\"u}beck, Germany}, abstract = {Security has gained increasing relevance in the development of embedded devices. Towards the aim of a secure system at each level of the design, in this paper we address security aspects related to Networks-on-Chips (NoCs) architectures. After presenting the attacks most likely to address NoCs, we survey existing academic and industrial secure architectures relevant to our case, focusing in particular on their communication infrastructure. We outline and propose possible solutions to contrast some of the attacks described and suggest the use of the NoC as a mean to monitor and detect unexpected system behaviors.}, keywords = {embedded systems, multiprocessor system-on-chip (MPSoC), network-on-chip (NoC), security}, doi = {http://dx.doi.org/10.1109/DSD.2007.4341520}, author = {Fiorin, Leandro and Silvano, Cristina and Sami, Mariagiovanna} } @conference {66.PaMaSi07, title = {A Topology Design Customization Approach for (STNoC)}, booktitle = {Nano-Nets 07, in proceedings of 2nd International Conference on Nano-Networks (Nano-Nets 2007).}, year = {2007}, month = {September 24-26}, address = {Catania, Italy}, abstract = {To support high bandwidth SoCs, a communication design flow is necessary for the design space exploration respecting tight design requirements. In order to exploit the benefits introduced by the NoC approach for the on-chip communication, the paper presents a design flow for the core mapping and customization of the network topology applied to STNoC, the Network on-Chip developed by STMicroelectronics. Starting from ring topology, the proposed application-specific flow tries to find a set of customized topologies, optimized in terms of performance and area/energy overhead, by adding links. The generated STNoC custom topologies provide a reduced cost with respect to the spidergon topology.}, keywords = {application specific design, mapping, network-on-chip (NoC), STNoC, topology customization}, doi = {http://dx.doi.org/10.1109/ASAP.2007.4429959}, author = {Palermo, Gianluca and Mariani, Giovanni and Silvano, Cristina and Locatelli, Riccardo and Coppola, Marcello} } @conference {9.SaSaSciSiZaZa2003, title = {A Methodology for efficient architectural exploration of energy-delay trade-offs for embedded systems}, booktitle = {SAC 2003}, year = {2003}, month = {March}, pages = {672-678}, address = {Melbourne}, abstract = {The main goal of this paper is to identify the best architecture of an embedded system by considering at the same time energy and delay, avoiding the comprehensive analysis of the architectural design space. We adopt the Energy-Delay Product (EDP) as the evaluation metric to compare the alternative architectures of the target system. The paper analyzes an extended adaptive random search algorithm (ADGREED) to efficiently explore the architectural design space. The ADGREED algorithm is a pseudo-random optimization algorithm that combines the best potentialities of the adaptive random search (ADRAS) and the Greedy deterministic algorithm. The analysis has been carried out through the architectural optimization of the memory subsystem of a real-word embedded system executing the set of Mediabench benchmarks for multimedia applications. The reported experimental results have shown a reduction up to one order of magnitude of the number of design alternatives analyzed during the exploration phase, while maintaining very high accuracy.}, keywords = {design space exploration, embedded systems}, doi = {http://dx.doi.org/10.1145/952532.952664}, author = {Salvemini, Lorenzo and Sami, Mariagiovanna and Sciuto, Donatella and Silvano, Cristina and Zaccaria, Vittorio and Zafalon, Roberto} } @conference {6.BoSaSciSiZaZa2002, title = {Energy Estimation and Optimization of Embedded VLIW Processors based on Instruction Clustering}, booktitle = {39th Design Automation Conference}, year = {2002}, month = {June 10-14}, pages = {886-891}, address = {New Orleans}, abstract = {Aim of this paper is to propose a methodology for the definition of an instruction-level energy estimation framework for VLIW (Very Long Instruction Word) processors. The power modeling methodology is the key issue to define an effective energy-aware software optimisation strategy for state-of-the-art ILP (Instruction Level Parallelism) processors. The methodology is based on an energy model for VLIW processors that exploits instruction clustering to achieve an efficient and fine grained energy estimation. The approach aims at reducing the complexity of the characterization problem for VLIW processors from exponential, with respect to the number of parallel operations in the same very long instruction, to quadratic, with respect to the number of instruction clusters. Furthermore, the paper proposes a spatial scheduling algorithm based on a low-power reordering of the parallel operations within the same long instruction. Experimental results have been carried out on the Lx processor, a 4-issue VLIW core jointly designed by HPLabs and STMicroelectronics. The results have shown an average error of 1:9\% between the cluster-based estimation model and the reference design, with a standard deviation of 5:8\%. For the Lx architecture, the spatial instruction scheduling algorithm provides an average energy saving of 12\%.}, keywords = {power estimation, VLIW architectures}, doi = {http://dx.doi.org/10.1109/DAC.2002.1012747}, author = {Bona, Andrea and Sami, Mariagiovanna and Sciuto, Donatella and Silvano, Cristina and Zaccaria, Vittorio and Zafalon, Roberto} } @conference {4.BonaSaSciSiZaZa2002, title = {An Instruction-Level Methodology for Power Estimation and Optimization of Embedded VLIW cores}, booktitle = {DATE 2002}, year = {2002}, month = {March 4-8}, pages = {1128}, address = {Paris}, abstract = {The overall goal of this work is to define an instruction-level power macro-modeling and characterization methodology for VLIW embedded processor cores. The approach presented in this paper is a major extension of the work previously proposed in [1-3], targeting an instruction-level energy model to evaluate the energy consumption associated with a program execution on a pipelined VLIW core. Our first goal is the reduction of the complexity of the processor{\textquoteright}s energy model, without reducing the accuracy of the results. The second goal is to show how the energy model can be further simplified by introducing a methodology to automatically cluster the whole Instruction Set with respect to their average energy cost, in order to con verge to an highly effective design of experiments for the actual characterization task. The paper describes also the application of the proposed model to a real industrial VLIW core (the Lx Architecture developed by HP Labs and STMicroelectronics), to validate the effectiveness and accuracy of the proposed methodology.}, doi = {http://dx.doi.org/10.1109/DATE.2002.998484}, author = {Bona, Andrea and Sami, Mariagiovanna and Sciuto, Donatella and Silvano, Cristina and Zaccaria, Vittorio and Zafalon, Roberto} }