Implementation

2021-08-21 10:42:36 +02:00 · 2021-08-21 10:42:36 +02:00 · 7586d7e093
commit 7586d7e093
parent 23617633bf
8 changed files with 357 additions and 95 deletions
--- a/ensps.sty
+++ b/ensps.sty
@ -65,8 +65,8 @@
 \RequirePackage{subcaption}	% Macros pour changer le style des sous-titres de figure

 %% Draft %%
-\RequirePackage{draftwatermark}
-\SetWatermarkScale{3}
+% \RequirePackage{draftwatermark}
+% \SetWatermarkScale{2}

 % ------------- Du dessin et des couleurs ------------- 
 \RequirePackage{xcolor}		% Changer la couleur
--- a/png_img/Markov_Decision_Process_example.png
+++ b/png_img/Markov_Decision_Process_example.png
--- a/puml/call_stack_envoie.puml
+++ b/puml/call_stack_envoie.puml
@ -25,7 +25,7 @@ participant     SuperviseurLocal.reception_token    <<methode>>     order 43
 end box
 boundary        reseau                                              order 51

-usr -> HelperClient.get ++: .get(path)
+usr -> HelperClient.get ++: get(path)
 HelperClient.get -> HelperClient.get : generate_random_token(2)
 HelperClient.get -> HelperClient.send_request ++: send_request(request)
 HelperClient.send_request -> CoapClient.send_message ++: send_message(request)
--- a/puml/classe.puml
+++ b/puml/classe.puml
@ -0,0 +1,74 @@
+@startuml
+package "CoAPthon" as coapthon {
+    class "CoapClient" as client_coap{
+        +float RTO
+        +send_message()
+        +send_datagram()
+    }
+    class "HelperClient" as helper_client_coap{
+        +get()
+        +send_request()
+        +generate_random_token()
+    }
+}
+package "Notre ajout" as us {
+    class "SuperviseurLocal" as superviseur_local{
+        +list[float] RTTs
+        +float RTT_L
+        +float RTT_S
+        +float taux_retransmission
+        +float min_RTT
+        +float avg_RTT
+
+        +envoie_token()
+        +receprion_token()
+        +failed_request()
+        +reset()
+    }
+    class "SuperviseurGlobal" as superviseur_global{
+        +etat
+        +qualite
+        +application_action()
+        +reset()
+        +reset_rto()
+    }
+
+    class "Maquette" as maquette{
+        +action_spec
+        +observation_spec
+        +reset()
+        +step()
+        -_reset()
+        -_step()
+    }
+
+    class "RequettePeriodique" as requette {
+        +float periode
+    }    
+    note left of maquette: interface GYM
+
+
+}
+package "tf_agent" as tf{
+    abstract "PyEnvironment" as tfenv{
+        +action_spec
+        +observation_spec
+        +reset()
+        +step()
+        {abstract}-_reset()
+        {abstract}-_step()
+    }
+}
+helper_client_coap "1" *-- "1" client_coap : > Simplifie l'usage
+
+client_coap "1" *-- "1" superviseur_local : < Surveille et controle
+
+superviseur_global "1" o-- "N" superviseur_local : > fait la synthèse
+
+maquette "1" *-- "N" helper_client_coap
+maquette "1" *-- "1" superviseur_global : < envoie les données\nrécoltées
+maquette "1" *-- "N" requette : < permet le controle\nde la charge du réseau
+requette "1" --> "1" helper_client_coap : > envoie des requettes\navec une période précise
+
+tfenv <|-- maquette : < implémente
+@enduml
--- a/puml/struc_base.puml
+++ b/puml/struc_base.puml
@ -0,0 +1,16 @@
+@startuml
+
+component "Maquette" as maquette
+component "Agent IA" as ia
+
+component "CoAPthon" as coap
+component "TensorFlow" as tf
+
+interface "GYM" as gym
+
+maquette -right( gym
+ia -left( gym
+maquette ..> coap : use
+ia ..> tf : use
+
+@enduml
--- a/puml/struc_final.puml
+++ b/puml/struc_final.puml
@ -0,0 +1,7 @@
+@startuml
+:Utilisateur: as usr
+database "Base de\ndonnée" as db
+
+package "Maquette" as maquette
+
+@enduml
--- a/rapport.bib
+++ b/rapport.bib
@ -3,13 +3,13 @@
  title = {Fairness {{Aware Group Proportional Frequency Domain Resource Allocation}} in {{L}}-{{SC}}-{{FDMA Based Uplink}}},
  author = {Ahmed, Irfan and Mohamed, Amr},
  year = {2011},
+  journal = {IJCNS},
  volume = {04},
+  number = {08},
  pages = {487--494},
  issn = {1913-3715, 1913-3723},
  doi = {10.4236/ijcns.2011.48060},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/H8M4DAE6/Ahmed et Mohamed - 2011 - Fairness Aware Group Proportional Frequency Domain.pdf},
-  journal = {IJCNS},
-  number = {08}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/H8M4DAE6/Ahmed et Mohamed - 2011 - Fairness Aware Group Proportional Frequency Domain.pdf}
 }

@incollection{ancillottiRTTBasedCongestionControl2018,
@ -24,9 +24,9 @@
  address = {{Cham}},
  doi = {10.1007/978-3-030-02931-9_1},
  abstract = {The design of scalable and reliable transport protocols for IoT environments is still an unsolved issue. A simple stop-and-wait congestion control method and a lightweight reliability mechanism are only implemented in CoAP, an application protocol that provides standardised RESTful services for IoT devices. Inspired by delay-based congestion control algorithms that have been proposed for the TCP, in this work we propose a rate control technique that leverages measurements of roundtrip times (RTTs) to infer network state and to determine the flow rate that would prevent network congestion. Our key idea is that the growth of RTT variance, coupled with thresholds on CoAP message losses, is an effective way to detect the onset of network congestion. To validate our approach, we conduct a comparative performance analysis with the two loss-based congestion control methods of standard CoAP under different application scenarios. Results show that our solution outperforms the alternative methods, with a significant improvement of fairness and robustness against unacknowledged traffic.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/V8MFYI2R/Ancillotti et al. - 2018 - RTT-Based Congestion Control for the Internet of T.pdf},
  isbn = {978-3-030-02930-2 978-3-030-02931-9},
-  language = {en}
+  language = {en},
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/V8MFYI2R/Ancillotti et al. - 2018 - RTT-Based Congestion Control for the Internet of T.pdf}
 }

@inproceedings{ancillottiRTTBasedCongestionControl2018a,
@ -40,17 +40,17 @@
  publisher = {{Springer International Publishing}},
  doi = {10.1007/978-3-030-02931-9_1},
  abstract = {The design of scalable and reliable transport protocols for IoT environments is still an unsolved issue. A simple stop-and-wait congestion control method and a lightweight reliability mechanism are only implemented in CoAP, an application protocol that provides standardised RESTful services for IoT devices. Inspired by delay-based congestion control algorithms that have been proposed for the TCP, in this work we propose a rate control technique that leverages measurements of round-trip times (RTTs) to infer network state and to determine the flow rate that would prevent network congestion. Our key idea is that the growth of RTT variance, coupled with thresholds on CoAP message losses, is an effective way to detect the onset of network congestion. To validate our approach, we conduct a comparative performance analysis with the two loss-based congestion control methods of standard CoAP under different application scenarios. Results show that our solution outperforms the alternative methods, with a significant improvement of fairness and robustness against unacknowledged traffic.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/PEFCKHKA/Ancillotti et al. - 2018 - RTT-Based Congestion Control for the Internet of T.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/FHBCKQBZ/hal-02269740.html},
-  language = {en}
+  language = {en},
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/PEFCKHKA/Ancillotti et al. - 2018 - RTT-Based Congestion Control for the Internet of T.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/FHBCKQBZ/hal-02269740.html}
 }

@article{bormannBlockWiseTransfersConstrained2016,
  title = {Block-{{Wise Transfers}} in the {{Constrained Application Protocol}} ({{CoAP}})},
  author = {Bormann, C. and Z. Shelby, Ed},
  year = {2016},
+  number = {RFC 7959},
  issn = {2070-1721},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/HPKFQFKU/rfc7959.html},
-  number = {RFC 7959}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/HPKFQFKU/rfc7959.html}
 }

@techreport{bormannTerminologyConstrainedNodeNetworks2014,
@ -58,12 +58,12 @@
  author = {Bormann, C. and Ersue, M. and Keranen, A.},
  year = {2014},
  month = may,
+  number = {RFC7228},
  pages = {RFC7228},
  institution = {{RFC Editor}},
  doi = {10.17487/rfc7228},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/KXAF4RDI/Bormann et al. - 2014 - Terminology for Constrained-Node Networks.pdf},
  language = {en},
-  number = {RFC7228}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/KXAF4RDI/Bormann et al. - 2014 - Terminology for Constrained-Node Networks.pdf}
 }

@techreport{bradnerKeyWordsUse1997,
@ -71,24 +71,40 @@
  author = {Bradner, S.},
  year = {1997},
  month = mar,
+  number = {RFC2119},
  pages = {RFC2119},
  institution = {{RFC Editor}},
  doi = {10.17487/rfc2119},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/CSVTPM96/Bradner - 1997 - Key words for use in RFCs to Indicate Requirement .pdf},
  language = {en},
-  number = {RFC2119}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/CSVTPM96/Bradner - 1997 - Key words for use in RFCs to Indicate Requirement .pdf}
+}
+
+@article{brockmanOpenAIGym2016,
+  title = {{{OpenAI Gym}}},
+  author = {Brockman, Greg and Cheung, Vicki and Pettersson, Ludwig and Schneider, Jonas and Schulman, John and Tang, Jie and Zaremba, Wojciech},
+  year = {2016},
+  month = jun,
+  journal = {arXiv:1606.01540 [cs]},
+  eprint = {1606.01540},
+  eprinttype = {arxiv},
+  primaryclass = {cs},
+  abstract = {OpenAI Gym1 is a toolkit for reinforcement learning research. It includes a growing collection of benchmark problems that expose a common interface, and a website where people can share their results and compare the performance of algorithms. This whitepaper discusses the components of OpenAI Gym and the design decisions that went into the software.},
+  archiveprefix = {arXiv},
+  language = {en},
+  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning},
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/L6IXPMUV/Brockman et al. - 2016 - OpenAI Gym.pdf}
 }

@article{FairnessMeasure2020,
  title = {Fairness Measure},
  year = {2020},
  month = dec,
-  abstract = {Fairness measures or metrics are used in network engineering to determine whether users or applications are receiving a fair share of system resources. There are several mathematical and conceptual definitions of fairness.},
-  annotation = {Page Version ID: 993616223},
-  copyright = {Creative Commons Attribution-ShareAlike License},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/P7E9KWUD/index.html},
  journal = {Wikipedia},
-  language = {en}
+  abstract = {Fairness measures or metrics are used in network engineering to determine whether users or applications are receiving a fair share of system resources. There are several mathematical and conceptual definitions of fairness.},
+  copyright = {Creative Commons Attribution-ShareAlike License},
+  language = {en},
+  annotation = {Page Version ID: 993616223},
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/P7E9KWUD/index.html}
 }

@article{haileEndtoendCongestionControl2021,
@ -96,15 +112,15 @@
  author = {Haile, Habtegebreil and Grinnemo, Karl-Johan and Ferlin, Simone and Hurtig, Per and Brunstrom, Anna},
  year = {2021},
  month = feb,
+  journal = {Computer Networks},
  volume = {186},
  pages = {107692},
  issn = {1389-1286},
  doi = {10.1016/j.comnet.2020.107692},
  abstract = {Cellular networks have evolved to support high peak bitrates with low loss rates as observed by the higher layers. However, applications and services running over cellular networks are now facing other difficult congestion-related challenges, most notably a highly variable link capacity and bufferbloat. To overcome these issues and improve performance of network traffic in 4G/5G cellular networks, a number of in-network and end-to-end solutions have been proposed. Fairness between interacting congestion control algorithms (CCAs) has played an important role in the type of CCAs considered for research and deployment. The placement of content closer to the user and the allocation of per-user queues in cellular networks has increased the likelihood of a cellular access bottleneck and reduced the extent of flow interaction between multiple users. This has resulted in renewed interest in end-to-end CCAs for cellular networks by opening up room for research and exploration. In this work, we present end-to-end CCAs that target a high throughput and a low latency over highly variable network links, and classify them according to the way they address the congestion control. The work also discusses the deployability of the algorithms. In addition, we provide insights into possible future research directions, such as coping with a higher degree of variability, interaction of CCAs in a shared bottleneck, and avenues for synergized research, such as CCAs assisted by software defined networking and network function virtualization. We hope that this work will serve as a starting point for systematically navigating through the expanding number of cellular CCAs.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/ZYVN97E6/Haile et al. - 2021 - End-to-end congestion control approaches for high .pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/ZBPCYB5Y/S1389128620312974.html},
-  journal = {Computer Networks},
+  language = {en},
  keywords = {4G,5G,Congestion control,Mobile,QUIC,Survey,TCP,Wireless},
-  language = {en}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/ZYVN97E6/Haile et al. - 2021 - End-to-end congestion control approaches for high .pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/ZBPCYB5Y/S1389128620312974.html}
 }

@techreport{hartkeObservingResourcesConstrained2015,
@ -112,12 +128,12 @@
  author = {Hartke, K.},
  year = {2015},
  month = sep,
+  number = {RFC7641},
  pages = {RFC7641},
  institution = {{RFC Editor}},
  doi = {10.17487/RFC7641},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/8VN6JIFQ/Hartke - 2015 - Observing Resources in the Constrained Application.pdf},
  language = {en},
-  number = {RFC7641}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/8VN6JIFQ/Hartke - 2015 - Observing Resources in the Constrained Application.pdf}
 }

@article{hochreiterLongShortTermMemory1997,
@ -125,14 +141,14 @@
  author = {Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
  year = {1997},
  month = nov,
+  journal = {Neural Computation},
  volume = {9},
+  number = {8},
  pages = {1735--1780},
  issn = {0899-7667},
  doi = {10.1162/neco.1997.9.8.1735},
  abstract = {Learning to store information over extended time intervals by recurrent backpropagation takes a very long time, mostly because of insufficient, decaying error backflow. We briefly review Hochreiter's (1991) analysis of this problem, then address it by introducing a novel, efficient, gradient based method called long short-term memory (LSTM). Truncating the gradient where this does not do harm, LSTM can learn to bridge minimal time lags in excess of 1000 discrete-time steps by enforcing constant error flow through constant error carousels within special units. Multiplicative gate units learn to open and close access to the constant error flow. LSTM is local in space and time; its computational complexity per time step and weight is O. 1. Our experiments with artificial data involve local, distributed, real-valued, and noisy pattern representations. In comparisons with real-time recurrent learning, back propagation through time, recurrent cascade correlation, Elman nets, and neural sequence chunking, LSTM leads to many more successful runs, and learns much faster. LSTM also solves complex, artificial long-time-lag tasks that have never been solved by previous recurrent network algorithms.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/E29B3BIB/Hochreiter et Schmidhuber - 1997 - Long Short-Term Memory.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/CZ2ERT63/Long-Short-Term-Memory.html},
-  journal = {Neural Computation},
-  number = {8}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/E29B3BIB/Hochreiter et Schmidhuber - 1997 - Long Short-Term Memory.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/CZ2ERT63/Long-Short-Term-Memory.html}
 }

@article{jainQuantitativeMeasureFairness1998,
@ -140,13 +156,13 @@
  author = {Jain, R. and Chiu, D. and Hawe, W.},
  year = {1998},
  month = sep,
-  abstract = {Fairness is an important performance criterion in all resource allocation schemes, including those in distributed computer systems. However, it is often specified only qualitatively. The quantitative measures proposed in the literature are either too specific to a particular application, or suffer from some undesirable characteristics. In this paper, we have introduced a quantitative measure called Indiex of FRairness. The index is applicable to any resource sharing or allocation problem. It is independent of the amount of the resource. The fairness index always lies between 0 and 1. This boundedness aids intuitive understanding of the fairness index. For example, a distribution algorithm with a fairness of 0.10 means that it is unfair to 90\% of the users. Also, the discrimination index can be defined as 1 - fairness index.},
-  archiveprefix = {arXiv},
+  journal = {arXiv:cs/9809099},
  eprint = {cs/9809099},
  eprinttype = {arxiv},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/NZS4CZVM/Jain et al. - 1998 - A Quantitative Measure Of Fairness And Discriminat.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/PP7ZG8W3/9809099.html},
-  journal = {arXiv:cs/9809099},
-  keywords = {C.2.1,Computer Science - Networking and Internet Architecture}
+  abstract = {Fairness is an important performance criterion in all resource allocation schemes, including those in distributed computer systems. However, it is often specified only qualitatively. The quantitative measures proposed in the literature are either too specific to a particular application, or suffer from some undesirable characteristics. In this paper, we have introduced a quantitative measure called Indiex of FRairness. The index is applicable to any resource sharing or allocation problem. It is independent of the amount of the resource. The fairness index always lies between 0 and 1. This boundedness aids intuitive understanding of the fairness index. For example, a distribution algorithm with a fairness of 0.10 means that it is unfair to 90\% of the users. Also, the discrimination index can be defined as 1 - fairness index.},
+  archiveprefix = {arXiv},
+  keywords = {C.2.1,Computer Science - Networking and Internet Architecture},
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/NZS4CZVM/Jain et al. - 1998 - A Quantitative Measure Of Fairness And Discriminat.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/PP7ZG8W3/9809099.html}
 }

@inproceedings{jayDeepReinforcementLearning2019,
@ -159,8 +175,8 @@
  publisher = {{PMLR}},
  issn = {2640-3498},
  abstract = {We present and investigate a novel and timely application domain for deep reinforcement learning (RL): Internet congestion control. Congestion control is the core networking task of modulating traf...},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/J3EGXP4X/Jay et al. - 2019 - A Deep Reinforcement Learning Perspective on Inter.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/4LBJU9M4/jay19a.html},
-  language = {en}
+  language = {en},
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/J3EGXP4X/Jay et al. - 2019 - A Deep Reinforcement Learning Perspective on Inter.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/4LBJU9M4/jay19a.html}
 }

@techreport{kushalnagarIPv6LowPowerWireless2007,
@ -169,12 +185,12 @@
  author = {Kushalnagar, N. and Montenegro, G. and Schumacher, C.},
  year = {2007},
  month = aug,
+  number = {RFC4919},
  pages = {RFC4919},
  institution = {{RFC Editor}},
  doi = {10.17487/rfc4919},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/EE2RHNFM/Kushalnagar et al. - 2007 - IPv6 over Low-Power Wireless Personal Area Network.pdf},
  language = {en},
-  number = {RFC4919}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/EE2RHNFM/Kushalnagar et al. - 2007 - IPv6 over Low-Power Wireless Personal Area Network.pdf}
 }

@inproceedings{lanDeepReinforcementLearning2019,
@ -187,8 +203,8 @@
  issn = {1938-1883},
  doi = {10.1109/ICC.2019.8761737},
  abstract = {Named Data Networking (NDN) is an emerging future network architecture that changes the network communication model from push mode to pull mode, which leads to the requirement of a new mechanism of congestion control. To fully exploit the capability of NDN, a suitable congestion control scheme must consider the characteristics of NDN, such as connectionless, in-network caching, content perceptibility, etc. In this paper, firstly, we redefine the congestion control objective for NDN, which considers requirements diversities for different contents. Then we design and develop an efficient congestion control mechanism based on deep reinforcement learning (DRL), namely DRL-based Congestion Control Protocol (DRL-CCP). DRL-CCP enables consumers to automatically learn the optimal congestion control policy from historical congestion control experience. Finally, a real-world test platform with some typical congestion control algorithms for NDN is implemented, and a series of comparative experiments are performed on this platform to verify the performance of DRL-CCP.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/JD3BUTCA/8761737.html},
-  keywords = {Data models,Deep learning,Neural networks,Protocols,Reinforcement learning,TCPIP,Training}
+  keywords = {Data models,Deep learning,Neural networks,Protocols,Reinforcement learning,TCPIP,Training},
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/JD3BUTCA/8761737.html}
 }

@article{leeEnhancementCongestionControl2016,
@ -196,11 +212,11 @@
  author = {Lee, Jung and Kim, Kyung and Youn, Hee},
  year = {2016},
  month = nov,
+  journal = {International Journal of Distributed Sensor Networks},
  volume = {12},
  doi = {10.1177/1550147716676274},
  abstract = {With the wide spread of Internet of Things, efficient communication between the nodes is getting more important. Constrained Application Protocol was developed to accommodate the resource-constrained nodes and low-power communication links. Being an Internet protocol, Constrained Application Protocol must adhere to congestion control, primarily to keep the backbone network stable. An advanced congestion control mechanism for Constrained Application Protocol, called Congestion Control/Advanced, has also been recently developed. In this article, we propose a new round trip time\textendash based adaptive congestion control scheme, which further improves Congestion Control/Advanced by utilizing the retransmission count in estimating the retransmission timeout value and the lower bound in round trip time variation. An experiment is conducted based on Californium Constrained Application Protocol framework and real devices, and the performance is compared with Constrained Application Protocol, Congestion Control/Advanced, and an existing scheme. It reveals that the proposed scheme significantly increases the throughput and rate of successful transactions in comparison with the other schemes. The approach of utilizing the option field of Constrained Application Protocol enables the proposed scheme to be implemented without any conflict with the existing protocol and extra overhead.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/ESXYH32D/Lee et al. - 2016 - Enhancement of congestion control of Constrained A.pdf},
-  journal = {International Journal of Distributed Sensor Networks}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/ESXYH32D/Lee et al. - 2016 - Enhancement of congestion control of Constrained A.pdf}
 }

@article{liQTCPAdaptiveCongestion2019,
@ -209,15 +225,15 @@
  author = {Li, Wei and Zhou, Fan and Chowdhury, Kaushik Roy and Meleis, Waleed},
  year = {2019},
  month = jul,
+  journal = {IEEE Trans. Netw. Sci. Eng.},
  volume = {6},
+  number = {3},
  pages = {445--458},
  issn = {2327-4697, 2334-329X},
  doi = {10.1109/TNSE.2018.2835758},
  abstract = {Next generation network access technologies and Internet applications have increased the challenge of providing satisfactory quality of experience for users with traditional congestion control protocols. Efforts on optimizing the performance of TCP by modifying the core congestion control method depending on specific network architectures or apps do not generalize well under a wide range of network scenarios. This limitation arises from the rule-based design principle, where the performance is linked to a pre-decided mapping between the observed state of the network to the corresponding actions. Therefore, these protocols are unable to adapt their behavior in new environments or learn from experience for better performance. We address this problem by integrating a reinforcement-based Q-learning framework with TCP design in our approach called QTCP. QTCP enables senders to gradually learn the optimal congestion control policy in an on-line manner. QTCP does not need hard-coded rules, and can therefore generalize to a variety of different networking scenarios. Moreover, we develop a generalized Kanerva coding function approximation algorithm, which reduces the computation complexity of value functions and the searchable size of the state space. We show that QTCP outperforms the traditional rule-based TCP by providing 59.5\% higher throughput while maintaining low transmission latency.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/DBTMF6SI/Li et al. - 2019 - QTCP Adaptive Congestion Control with Reinforceme.pdf},
-  journal = {IEEE Trans. Netw. Sci. Eng.},
  language = {en},
-  number = {3}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/DBTMF6SI/Li et al. - 2019 - QTCP Adaptive Congestion Control with Reinforceme.pdf}
 }

@article{mccallGeneticAlgorithmsModelling2005,
@ -225,17 +241,17 @@
  author = {McCall, John},
  year = {2005},
  month = dec,
+  journal = {Journal of Computational and Applied Mathematics},
+  series = {Special {{Issue}} on {{Mathematics Applied}} to {{Immunology}}},
  volume = {184},
+  number = {1},
  pages = {205--222},
  issn = {0377-0427},
  doi = {10.1016/j.cam.2004.07.034},
  abstract = {Genetic algorithms (GAs) are a heuristic search and optimisation technique inspired by natural evolution. They have been successfully applied to a wide range of real-world problems of significant complexity. This paper is intended as an introduction to GAs aimed at immunologists and mathematicians interested in immunology. We describe how to construct a GA and the main strands of GA theory before speculatively identifying possible applications of GAs to the study of immunology. An illustrative example of using a GA for a medical optimal control problem is provided. The paper also includes a brief account of the related area of artificial immune systems.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/ZWGD35YA/McCall - 2005 - Genetic algorithms for modelling and optimisation.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/VAQQGXRW/S0377042705000774.html},
-  journal = {Journal of Computational and Applied Mathematics},
-  keywords = {Evolution,Genetic algorithms,Immunology,Optimisation},
  language = {en},
-  number = {1},
-  series = {Special {{Issue}} on {{Mathematics Applied}} to {{Immunology}}}
+  keywords = {Evolution,Genetic algorithms,Immunology,Optimisation},
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/ZWGD35YA/McCall - 2005 - Genetic algorithms for modelling and optimisation.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/VAQQGXRW/S0377042705000774.html}
 }

@techreport{montenegroTransmissionIPv6Packets2007,
@ -243,12 +259,12 @@
  author = {Montenegro, G. and Kushalnagar, N. and Hui, J. and Culler, D.},
  year = {2007},
  month = sep,
+  number = {RFC4944},
  pages = {RFC4944},
  institution = {{RFC Editor}},
  doi = {10.17487/rfc4944},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/8IBKXRLW/Montenegro et al. - 2007 - Transmission of IPv6 Packets over IEEE 802.15.4 Ne.pdf},
  language = {en},
-  number = {RFC4944}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/8IBKXRLW/Montenegro et al. - 2007 - Transmission of IPv6 Packets over IEEE 802.15.4 Ne.pdf}
 }

@article{naDLTCPDeepLearningBased2019,
@ -256,14 +272,14 @@
  shorttitle = {{{DL}}-{{TCP}}},
  author = {Na, Woongsoo and Bae, Byungjun and Cho, Sukhee and Kim, Nayeon},
  year = {2019},
+  journal = {IEEE Access},
  volume = {7},
  pages = {145134--145144},
  issn = {2169-3536},
  doi = {10.1109/ACCESS.2019.2945582},
  abstract = {The 5G mobile communication system is attracting attention as one of the most suitable communication models for broadcasting and managing disaster situations, owing to its large capacity and low latency. High-quality videos taken by a drone, which is an embedded IoT device for shooting in a disaster environment, play an important role in managing the disaster. However, the 5G mmWave frequency band is susceptible to obstacles and has beam misalignment problems, severing the connection and greatly affecting the degradation of TCP performance. This problem becomes even more serious in high-mobility drones and disaster sites with many obstacles. To solve this problem, we propose a deep-learning-based TCP (DL-TCP) for a disaster 5G mmWave network. DL-TCP learns the node's mobility information and signal strength, and adjusts the TCP congestion window by predicting when the network is disconnected and reconnected. As a result of the experiment, DL-TCP provides better network stability and higher network throughput than the existing TCP NewReno, TCP Cubic, and TCP BBR.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/GCGKXV6U/Na et al. - 2019 - DL-TCP Deep Learning-Based Transmission Control P.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/PV8BQ6LQ/8859212.html},
-  journal = {IEEE Access},
-  keywords = {5G,5G mobile communication,Bandwidth,Deep-learning,Machine learning,mmWave,Protocols,Signal to noise ratio,supervised-learning,TCP,Videos,Wireless communication}
+  keywords = {5G,5G mobile communication,Bandwidth,Deep-learning,Machine learning,mmWave,Protocols,Signal to noise ratio,supervised-learning,TCP,Videos,Wireless communication},
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/GCGKXV6U/Na et al. - 2019 - DL-TCP Deep Learning-Based Transmission Control P.pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/PV8BQ6LQ/8859212.html}
 }

@article{oliveiraComputerNetworkTraffic2016,
@ -272,12 +288,12 @@
  author = {Oliveira, Tiago and Barbar, Jamil and Soares, Alexsandro},
  year = {2016},
  month = jan,
+  journal = {International Journal of Big Data Intelligence},
  volume = {3},
  pages = {28},
  doi = {10.1504/IJBDI.2016.073903},
  abstract = {This paper compares four different artificial neural network approaches for computer network traffic forecast, such as: 1) multilayer perceptron (MLP) using the backpropagation as training algorithm; 2) MLP with resilient backpropagation (Rprop); (3) recurrent neural network (RNN); 4) deep learning stacked autoencoder (SAE). The computer network traffic is sampled from the traffic of the network devices that are connected to the internet. It is shown herein how a simpler neural network model, such as the RNN and MLP, can work even better than a more complex model, such as the SAE. Internet traffic prediction is an important task for many applications, such as adaptive applications, congestion control, admission control, anomaly detection and bandwidth allocation. In addition, efficient methods of resource management, such as the bandwidth, can be used to gain performance and reduce costs, improving the quality of service (QoS). The popularity of the newest deep learning methods have been increasing in several areas, but there is a lack of studies concerning time series prediction, such as internet traffic.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/PP28QWSB/Oliveira et al. - 2016 - Computer network traffic prediction A comparison .pdf},
-  journal = {International Journal of Big Data Intelligence}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/PP28QWSB/Oliveira et al. - 2016 - Computer network traffic prediction A comparison .pdf}
 }

@techreport{rheeCUBICFastLongDistance2018,
@ -285,11 +301,11 @@
  author = {Rhee, I. and Xu, L. and Ha, S. and Zimmermann, A. and Eggert, L. and Scheffenegger, R.},
  year = {2018},
  month = feb,
+  number = {RFC8312},
  pages = {RFC8312},
  institution = {{RFC Editor}},
  doi = {10.17487/RFC8312},
-  language = {en},
-  number = {RFC8312}
+  language = {en}
 }

@techreport{shelbyConstrainedApplicationProtocol2014,
@ -297,11 +313,11 @@
  author = {Shelby, Z. and Hartke, K. and Bormann, C.},
  year = {2014},
  month = jun,
+  number = {RFC7252},
  pages = {RFC7252},
  institution = {{RFC Editor}},
  doi = {10.17487/rfc7252},
-  language = {en},
-  number = {RFC7252}
+  language = {en}
 }

@techreport{shelbyConstrainedRESTfulEnvironments2012,
@ -309,12 +325,12 @@
  author = {Shelby, Z.},
  year = {2012},
  month = aug,
+  number = {RFC6690},
  pages = {RFC6690},
  institution = {{RFC Editor}},
  doi = {10.17487/rfc6690},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/TJZ9ZDWD/Shelby - 2012 - Constrained RESTful Environments (CoRE) Link Forma.pdf},
  language = {en},
-  number = {RFC6690}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/TJZ9ZDWD/Shelby - 2012 - Constrained RESTful Environments (CoRE) Link Forma.pdf}
 }

@article{spivakComparisonDigitalMaps2014,
@ -322,13 +338,13 @@
  shorttitle = {Comparison of {{Digital Maps}}},
  author = {Spivak, Lev and Spivak, Ivan and Sokolov, Alexey and Voinov, Sergey},
  year = {2014},
+  journal = {JGIS},
  volume = {06},
+  number = {05},
  pages = {415--422},
  issn = {2151-1950, 2151-1969},
  doi = {10.4236/jgis.2014.65036},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/39HVB5X5/Spivak et al. - 2014 - Comparison of Digital Maps Recognition and Quanti.pdf},
-  journal = {JGIS},
-  number = {05}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/39HVB5X5/Spivak et al. - 2014 - Comparison of Digital Maps Recognition and Quanti.pdf}
 }

@book{tesslerReinforcementLearningDatacenter2021,
@ -344,52 +360,52 @@
  shorttitle = {{{TCP}}-{{Drinc}}},
  author = {Xiao, Kefan and Mao, Shiwen and Tugnait, Jitendra K.},
  year = {2019},
+  journal = {IEEE Access},
  volume = {7},
  pages = {11892--11904},
  issn = {2169-3536},
  doi = {10.1109/ACCESS.2019.2892046},
  abstract = {As wired/wireless networks become more and more complex, the fundamental assumptions made by many existing TCP variants may not hold true anymore. In this paper, we develop a model-free, smart congestion control algorithm based on deep reinforcement learning, which has a high potential in dealing with the complex and dynamic network environment. We present TCP-Deep ReInforcement learNing-based Congestion control (Drinc) which learns from past experience in the form of a set of measured features to decide how to adjust the congestion window size. We present the TCP-Drinc design and validate its performance with extensive ns-3 simulations and comparison with five benchmark schemes.},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/FTP7JBFP/Xiao et al. - 2019 - TCP-Drinc Smart Congestion Control Based on Deep .pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/SECVK22A/8610116.html},
-  journal = {IEEE Access},
-  keywords = {Congestion control,deep convolutional neural network (DCNN),deep reinforcement learning (DRL),Delays,long short term memory (LSTM),machine learning,Microsoft Windows,Protocols,Reinforcement learning,Wireless communication,Wireless sensor networks}
+  keywords = {Congestion control,deep convolutional neural network (DCNN),deep reinforcement learning (DRL),Delays,long short term memory (LSTM),machine learning,Microsoft Windows,Protocols,Reinforcement learning,Wireless communication,Wireless sensor networks},
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/FTP7JBFP/Xiao et al. - 2019 - TCP-Drinc Smart Congestion Control Based on Deep .pdf;/home/leopold/snap/zotero-snap/common/Zotero/storage/SECVK22A/8610116.html}
 }

@article{yangResearchLaboratoryManagement2021,
  title = {Research on the {{Laboratory Management Mode Based}} on the {{Optimal Allocation}} of {{Resources}}},
  author = {Yang, Bo},
  year = {2021},
+  journal = {OALib},
  volume = {08},
+  number = {01},
  pages = {1--8},
  issn = {2333-9721, 2333-9705},
-  doi = {10.4236/oalib.1107119},
-  journal = {OALib},
-  number = {01}
+  doi = {10.4236/oalib.1107119}
 }

@article{zhaoResourceAllocationOFDMAMIMO2013,
  title = {Resource {{Allocation}} for {{OFDMA}}-{{MIMO Relay Systems}} with {{Proportional Fairness Constraints}}},
  author = {Zhao, Cuiru and Li, Youming and Chen, Bin and Wang, Zhao and Wang, Jiongtao},
  year = {2013},
+  journal = {CN},
  volume = {05},
+  number = {03},
  pages = {303--307},
  issn = {1949-2421, 1947-3826},
  doi = {10.4236/cn.2013.53B2056},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/SHNR6SNE/Zhao et al. - 2013 - Resource Allocation for OFDMA-MIMO Relay Systems w.pdf},
-  journal = {CN},
-  number = {03}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/SHNR6SNE/Zhao et al. - 2013 - Resource Allocation for OFDMA-MIMO Relay Systems w.pdf}
 }

@article{zhengResearchAirportTaxi2020,
  title = {Research on {{Airport Taxi Resource Allocation Based}} on {{Information Asymmetry}}},
  author = {Zheng, Yansong},
  year = {2020},
+  journal = {OJBM},
  volume = {08},
+  number = {02},
  pages = {763--769},
  issn = {2329-3284, 2329-3292},
  doi = {10.4236/ojbm.2020.82046},
-  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/KHZSTVS8/Zheng - 2020 - Research on Airport Taxi Resource Allocation Based.pdf},
-  journal = {OJBM},
-  number = {02}
+  file = {/home/leopold/snap/zotero-snap/common/Zotero/storage/KHZSTVS8/Zheng - 2020 - Research on Airport Taxi Resource Allocation Based.pdf}
 }


--- a/rapport.tex
+++ b/rapport.tex
@ -19,6 +19,9 @@
 % ------------- Packages spéciaux, nécessaires pour ce rapport, à insérer ici ------------- 
 \usepackage{minted}
 \usepackage{lscape}
+\usepackage{wrapfig}
+
+\newcommand{\code}[1]{\mintinline[breaklines, breakafter=.]{python}{#1}}

 \usepackage[toc]{glossaries}
 \makeglossaries
@ -111,6 +114,14 @@

 \newcommand{\osi}{\gls{osi}}

+\newglossaryentry{gym}
+{
+  name=GYM,
+  description={Norme de compatibilité pour les environements de machine learning \cite{brockmanOpenAIGym2016}. \url{https://gym.openai.com/}}
+}
+
+\newcommand{\gym}{\gls{gym}}
+
 \newglossaryentry{cc}
 {
  name=CC,
@ -346,18 +357,29 @@ Notre problèmatique est donc de savoir si de tel solutions sont applicable à \

 \subsection{Qu'est-ce que l'apprentissage par renforcement ?}

+\begin{wrapfigure}{r}{0.40\textwidth} %this figure will be at the right
+  \centering
+  \includegraphics[width=0.40\textwidth]{png_img/Markov_Decision_Process_example.png}
+  \caption{Exemple de processus Markovien à trois états et deux actions.}
+\end{wrapfigure}
+
 L'apprentissage par renforcement est une solution d'apprentissage machine pour les systèmes markoviens.
 Le système est constitué d'un environnement et d'un acteur.
 On suppose que le temps est discontinu.
 Ainsi à l'instant $t$, l'environement est dans un état $e \in \mathbb{E}$.
 L'acteur observe l'environement au travers d'un interpréteur, qui lui renvoie un vecteur $s \in \mathbb{S}$, le vecteur d'observation.
-L'acteur à la possibilité de réalisé une action $a in \mathbb{A}$ pour influancé l'environement.
+L'acteur à la possibilité de réalisé une action $a \in \mathbb{A}$ pour influancé l'environement.
 Suite à cette action, l'environement change d'état, et renvoie à l'acteur un nouveau vecteur d'observation et une récompence $r \in \R$.
 Cette récompence permet de quantifier la qualité de l'action et du nouvel état.
 Pour la suite, on notera $x ^ i$ la i-ème grandeur, et $x_t$ la grandeur à l'instant $t$.
 Le but de l'acteur est de maximisé la récompence total qu'il va recevoire au cours d'un épisode, c'est à dire une succession de pas de temps.
 Pour cela, l'acteur doit crée une fonction $\pi : \mathbb{S} \rightarrow \mathbb{A}$ permetant de donner l'action optimal à réaliser pour chaque état.

+La solution optimal est définie comme l'action permetant de maximiser le retour global : \begin{equation}
+  R = sum_{i = 1} ^\infty r _ i \gamma ^ i
+  \label{eq:rl:R} 
+\end{equation}
+
 La methode de base pour construire cette fonction est le \qlearn{}.
 C'est un possecus itératif où l'ont va construire un tableau (la Q-table), noté $\mathbb{Q} : \mathbb{S} \times \mathbb{A} \rightarrow \R$ associant à chaque couple observation/action une valeur.
 On parcours l'environement et à chaque pas de temps, on met à jour la valeur de $\mathbb{Q}(s_t, a_t)$ grace à la formule suivante : \begin{equation}
@ -368,7 +390,7 @@ Les deux paramètre sont : \begin{itemize}
  \item $\alpha$ le taux d'apprentissage, c'est à dire à quelle point les nouvelles informations écrasent les anciennes,
  \item $\gamma$ l'horizon, qui permet de controler à quelle point l'algorithme se préocupe du future.
 \end{itemize}
-La méthode pour choisir quelle action prendre à chaque pas de temps est juste de prendre $argmax_{a in \mathbb{A}}  \mathbb{Q}\left(s_t, a \right) $.
+La méthode pour choisir quelle action prendre à chaque pas de temps est juste de prendre $argmax_{a \in \mathbb{A}}  \mathbb{Q}\left(s_t, a \right) $.
 En réalité, on fait des actions aléatoire avec une probabilité faible pour exploré l'environement.
 Deux des limites de cette solution sont la taille en mémoire d'un tel tableau quand les ensembles $mathbb{S}$ et $\mathbb{A}$ sont grands, et le temps qu'il faut pour que la convergence des valeur du tableau ais lieu.
 Pour palié à cela, on peut approximé la fonction par un réseau de neurrone, c'est le principe du \dqlearn{}.
@ -393,7 +415,7 @@ Ainsi on décide d'une fenetre de congestion, c'est à dire le nombre de paquet
 Si un paquet n'est pas aquité avant la fin d'un \rto{} fixe, on le réenvoie.
 Les \CCA{} on pour rôle de gérer la taille de la fenetre, et le \rto{} pour optimisé la connexion.

-Les \CCA{} de \tcp{} sont nombreux (une classification partiel est lsible dans \cite{haileEndtoendCongestionControl2021}), je ne vais donc pas tous les décrire.
+Les \CCA{} de \tcp{} sont nombreux (une classification partiel est lsible dans \cite{haileEndtoendCongestionControl2021}), il ne seront donc pas tous décrit.
 Le principe de base de la plupart des algoritmes est l'AIMD (additive-increase-multiplicative-decrease).
 Par exemple \newreno{}, le prédececeure de \cubic{}, augmente la taille de la fenetre par pas de $1$ tout les \rtt{}, et multiplie la taille par $\frac{1}{2}$ en cas de détéction de congestion.
 L'évolution proposé par \cubic{} est de ne pas venir croitre linéairement, mais de suivre une courbe cubique pour revenir en douceur à la taille de fenetre qui avait causé la congestion.
@ -459,7 +481,7 @@ Les intégration de l'IA dans le \CC{} n'est encore qu'a l'état de test, de bal

 \section{Modélisation choisit}
 La première étape est de modélisé le système.
-Pour la modélisation, je m'appuis sur la modélisation de \cite{xiaoTCPDrincSmartCongestion2019}.
+Notre modélisation, s'appuis sur la modélisation de \cite{xiaoTCPDrincSmartCongestion2019}.

 \subsection{Quels cas d'utilisations nous est le plus favorable ?}
 Pour notre travail, on se positionne dans une situation où un serveur centrale puissant voudrait récupérer des données sur un ensemble de capteur.
@ -577,7 +599,7 @@ Une fois que chaque client a generé sont vecteur, il suffit de les concaténés
  \label{eq:modele:mat}
 \end{equation}
 L'avantage d'une tel matrice est que on pourra utiliser une convolution dans le réseau de nerrone.
-Une autre possibilité, que je n'ais pas pu tester est de concaténer les matrices de plusieur echantillion de temps successif pour crée un tenseur à trois dimension, qui permetrait d'avoir plus d'informations sur l'évolution temporelle des valeurs.
+Une autre possibilité, qui est de concaténer les matrices de plusieur echantillion de temps successif pour crée un tenseur à trois dimension, qui permetrait d'avoir plus d'informations sur l'évolution temporelle des valeurs, n'a pas pu etre testée.

 \subsection{Comment l'acteur influance le client \coap{} ?}
 La principales valeur permetant de controler le comportement du client est le \rto{}.
@ -604,7 +626,7 @@ D'autre choix sont possibles, par exemple $\mathbb{A} = \left\lbrace 0.1, 0.5, 0
 \subsection{Comment quantifier la réussite de l'agent ?}
 Une fois que l'on sais représenter l'état du réseau, il faut déterminer si cette état est favorable ou non.
 Pour cela, il faut se demander ce que l'on veux comme caractéristique.
-Je choisit comme critaire : \begin{itemize}
+Les critaires choisit sont : \begin{itemize}
  \item Le delai est le plus petit possible, pour avoir un système réactif,
  \item il y a peu de retransmission, pour ne pas encombrer le réseau et abusé des ressources energetiques des capteurs,
  \item il y a tres peu d'echec, c'est à dire de message qui dépasse le nombre de retransmission maximal,
@ -634,7 +656,7 @@ Où $n^e$ est le nombre de message envoyé, emition initial et retransmission, e
 $n_\text{echec}$ est le nombre de transaction ayant échouée.
 Les facteurs $\beta$ permetent de pondérer l'effet des différentes composante.

-\subsection{Quel type d'agent utilisé ?}
+\subsection{Quel type d'agent utilisé ? \label{part:description_boucle}}
 Maintenant que l'environement est fixé, on peut déterminer comment l'acteur va s'intégrer.
 On choisit le fonctionement suivant : \begin{itemize}
  \item L'acteur détermine $a_t$ en fonction de $s_t$,
@ -684,16 +706,16 @@ Le plus utilisé est \ns{}.
 C'est un simulateur à envenement discret, destiné à la simulation de réseau informatique permetant de travaillier sur toute les couches du modèle \osi{}.
 Ainsi on peut entrainer le simulateur avec la topologie du réseau que l'on veux, et la qualité des connexions sans fils que l'on veux.
 Malheureusment, c'est un simulateur complexe à utilisé : tout les scripts de simulation sont écrit en \cpp{}.
-Par manque de temps et de compétence, je n'ais pas réalisé de simulation \ns{}, mais ca peut etre une piste à étudier pour le pré-entrainement.
+Par manque de temps et de compétence, les simulations sur \ns{} n'ont pas pu etre réalisé, mais cela reste une piste à étudier pour le pré-entrainement.

 \subsubsection{Par création artificiel de transition}

 Une autre méthode possible est de créer des transitions artificiellement.
-Pour cela, je commence par fixer une charge sur le réseau $f$, c'est à dire la fréquence à laquelle chaque client intéroge le capteur associé.
+Pour cela, l'algorithme commence par fixer une charge sur le réseau $f$, c'est à dire la fréquence à laquelle chaque client intéroge le capteur associé.
 Celon cette charge, le réseau est plus ou moins congestioner.
-Ensuite je prend plusieurs consigne de \rto{} $c_n$, et pour chaque consigne, je l'applique à l'agent, et je mesure le vecteur d'observation associé $s_n$ ainsi que $r_n$.
-Puis pour chaque couple $(s_n, s_m)$, je calcule l'action $a_{n , m}$ pour passer de $c_n$ à $c_m$.
-Je peu ainsi construire une série de trnasition $s_n, a_{n, m}, r_m, s_m$.
+Ensuite l'algorithme prend plusieurs consigne de \rto{} $c_n$, et pour chaque consigne, l'algorithme l'applique à l'agent, et mesure le vecteur d'observation associé $s_n$ ainsi que $r_n$.
+Puis pour chaque couple $(s_n, s_m)$, l'algorithme calcule l'action $a_{n , m}$ pour passer de $c_n$ à $c_m$.
+L'algorithme peu ainsi construire une série de trnasition $s_n, a_{n, m}, r_m, s_m$.
 On recommence avec un autre $f$

 Ces transitions sont uniquements des approximation, car elles ne tiennent pas réelement compte des effets de $s_t$ sur $s_{t+1}$ liée au temps de probagation des message dans le système.
@ -701,18 +723,143 @@ De plus elles supposent que la charge ne change pas sur le système.
 Malgrès cela, elle permet d'aller plus vite dans la génération de transition pour le pré-entrainement, car en $n$ interval de contrôl à $f$ fixe, on génère $n^2$ transitions.

 \section{Implémentation de l'algorithme}
+L'implémentation de l'alforitme est réalisé en Python.
+Notre implémentation est une modifications d'un module existant, \coapthon{}, qui comprend déjà tout les codes pour réalisé un client ou un serveur \coap{}.
+De plus, pour la partie apprentissage, on utilise \TF{}, un module répendandu pour l'apprentissage machine.

-\subsection{Librairie uilisé}
+\begin{figure}[htp]
+  \centering
+  \includegraphics[width = \textwidth]{puml/classe.png}
+  \caption[Diagramme de classe]{Diagramme de classe de notre système. La partie apprentissage n'est pas représentée. Seul les atribues et méthodes d'intérées sont représentées.}
+  \label{fig:imple:class}
+\end{figure}

-\subsubsection{\TF{} et \keras{}}
+\subsection{Modules uilisés}
+Les modules que nous utilisont ont leur propre interface et fonctionnement internet auquelle nous devont nous adapter.
+
+\paragraph{\coapthon{}}, pour sa partie client, utilise deux objets (voir figure \ref{fig:imple:class}) : \begin{itemize}
+  \item \mintinline{python}{coapthon.cient.helperclient.HelperClient}, qui permet d'avoir acces à des méthodes simple tel que \mintinline{python}{HelperClient.get(url)},
+  \item \mintinline{python}{coapthon.cient.coap.CoAP}, qui implémente la gestion des retransmissions, que l'on doit modifier.
+\end{itemize}
+
+Il y aussi une partie serveur, mais nous ne la modifions pas du tout.
+C'est elle qui joue le role de serveur \coap{} sur les \rasp{}.
+
+\paragraph{\TF{} et \keras{}} sont des modules \python{} fournissant les outils nessessaire à l'entrainement de modèle d'IA.
+\TF{} s'occupe des fonctions hautes, avec les outils permetant de mettre en place la boucle d'apprentissage
+Pour cela, il a besoin que l'on mette l'environement sous un format spécial : l'interface \gym{}.
+Cette interface peremt la standardisation des interfaces des environements.
+Pour cela, il faut définir une nouvelle classe, héritant de \code{tf_agents.environements.py_environement.PyEnvironement}, et surchargant les atribues et méthodes suivantes : 
+\begin{enumerate}
+  \item \code{.action_spec}, avec un objets de \code{tf_agents.specs.array_spec}, pour décrire l'espaces d'action $\mathbb{A}$,
+  \item \code{.observation_spec}, avec un objets de \code{tf_agents.specs.array_spec}, pour décrire l'espaces d'observation $\mathbb{S}$,
+  \item \code{._reset()}, qui doit renvoyer un objet \code{tf_agents.trajectories.time_step.transition},
+  \item \code{._step(action)}, qui doit renvoyer un objet \code{tf_agents.trajectories.time_step.transition} ou \code{tf_agents.trajectories.time_step.termination}.
+\end{enumerate}
+Les deux premier permetent de signalé à l'agent quelle valeurs sont autoriser.
+Les troisième permet de remettre l'environement dans un état initial.
+Le quatrième est le plus important, car il représente le passage d'un pas de temps.
+Les transitions sont les objets $\left( s_t, a_t, r_t, s_{t+1} \right)$.
+
+\keras{} quant à lui, permet l'utilisation et la manipulation des réseaux de neurrone.
+Puisque on n'utilise que des réseaux simple, les fonctionalités du module ne seront donc pas abordées.

-\subsubsection{\coapthon{}}

 \subsection{Structure du code}

-\subsection{Maquette utilisée\label{part_maquette}}
+\begin{wrapfigure}{r}{0.40\textwidth} %this figure will be at the right
+  \centering
+  \includegraphics[width=0.40\textwidth]{puml/struc_base.png}
+  \caption{Composants simplifiés de notre implémentation.}
+\end{wrapfigure}

-\subsection{Astuce pour simuler une congestion}
+La figure \ref{fig:call_stack} résume les appels de fonction au cours d'une requette \coap{}.
+La modification du client \coapthon{} consiste en l'ajout d'un composant, le \code{SuperviseurLocal}.
+Ce composant permet de changer la manière dont le \rto{} est choisit, et d'enregistré les \rto{}.
+Il y a ensuite un \code{SuperviseurGlobal} qui supervise tout les \code{SuperviseurLocal} du réseau pour générer les matrices d'état.
+C'est aussi ce \code{SuperviseurGlobal} qui distribut les consignes à tous les \code{SuperviseurLocal}.
+
+La procedure lors de l'envoie d'un message est visible dans la figure \ref{fig:call_stack}.
+Les modifications réalisées ne sont que l'ajout d'appel de fonction courte pour enregistré les timestamps de départ de transmission et retransmission, et la réception des aquitements/réponses.
+
+\subsubsection{Modification de \coapthon{}}
+On rajout à la classe \code{client.CoAP} un membre, le \code{SuperviseurLocal}.
+On modifie \code{client.CoAP.send_datagram} pour enregistré l'envoie d'un datagram, transmission ou retransmission, en fonction du token utilisé.
+On modifie aussi \code{client.CoAP.send_datagram._start_retransmission} pour que ce soit le superviseur qui done le \rto{}.
+On modifie finalement \code{client.CoAP.receive_datagram} pour enregistré les recepetion de messages en fonction de leurs tokens.
+
+Je définie plusieurs type de \code{SuperviseurLocal} : \begin{itemize}
+  \item \code{SuperviseurLocalPlaceHolder}, qui est utilisé par défaut est n'a aucune fonctionalités, et qui réutilise la génération de \rto{} de base dans \coapthon{},
+  \item \code{SuperviseurLocal}, qui prend en charge l'enregistrement des \rto{}, la moyenne et le minimum, et qui est capable d'utilisé un \rto{} détérminé par l'IA,
+  \item \code{SuperviseurLocalFiltre}, qui prend aussi en charge le filtrage exponentielle pour généré $RTT_L$ et $RTT_S$.
+\end{itemize}
+
+C'est la classe \code{SuperviseurGlobal} qui fournit le statistitiens permetant de généré les matrices d'état, ainsi que la distribution des consignes.
+Elle réalise aussi le calcul des récompences.
+
+\subsubsection{Boucle d'apprentissage}
+L'objet \code{Maquette} est l'objet qui intéragie avec l'agent IA.
+La methode la plus importante est \code{step(action)} qui prend l'action déterminer par l'IA comme entrée, l'applique 
+C'est donc cette méthode qui joue un rôle prédominant dans l'algorithme de la partie \ref{part:description_boucle}.
+
+\begin{listing}[htp]
+  \begin{minted}[frame=lines, 
+    framesep=2mm,
+    baselinestretch=1.2,
+    fontsize=\footnotesize,
+    linenos]{python}
+def collecteur(maquette: MaquetteCoapEnv, policy):
+    time_step = maquette.step(np.array(n_capteur*[0], dtype=np.float32))
+    while True:
+        action_step = policy.action(time_step)
+        next_time_step = maquette.step(action_step.action)
+        traj = trajectory.from_transition(
+            time_step, action_step, next_time_step)
+        with buffer_lock:
+            replay_buffer.add_batch(traj)
+        time_step = next_time_step
+        if traj.is_boundary():
+            maquette.reset()
+  \end{minted}
+  \caption{Code du collecteur d'expérience.}
+  \label{lst:collecteur}
+\end{listing}
+
+\begin{listing}[htp]
+  \begin{minted}[frame=lines, 
+    framesep=2mm,
+    baselinestretch=1.2,
+    fontsize=\footnotesize,
+    linenos]{python}
+while True:
+    time.sleep(60)
+    with buffer_lock:
+        if replay_buffer.num_frames() >= 1000:
+            experience = replay_buffer.gather_all()
+            train_loss = tf_agent.train(experience)
+            replay_buffer.clear()
+            tf_policy_saver.save(policy_dir)
+            n_epoch += 1
+  \end{minted}
+  \caption{Code de la boucle du thread principale.}
+  \label{lst:boucle_principale}
+\end{listing}
+
+Le code \ref{lst:collecteur} implémente le collecteur d'expérience.
+Le collecteur est éxecuté dans un thread à part.
+On lui donne comme paramètre une maquette sur laquelle travaillier, et la politique \code{policy}.
+Cette politique est déterminer par un autre thread.
+Les transitions sont stokées dans le \code{replay_buffer}.
+De plus, pour aller plus vide dans la collecte de transition, et pour saturée plus rapidement le réseau, on utilise plusieurs maquette en parrallèle, chacune dans un thread.
+
+\subsection{Maquette physique utilisée\label{part_maquette}}
+
+La maquette physique que nous utilisont est constitué de deux machines : \begin{itemize}
+  \item Un PC de bureau equipé d'un intel i5gen10, de 32 Go de RAM et d'un adaptateur WiFi ax,
+  \item un \rasp 3B+.
+\end{itemize}
+Les deux machines sont connectée par un réseau WiFi $2.5 GHz$ géré par le PC.
+La connexion est bien trop bonne pour pouvoir etre saturée par un système CoAP classique seul, d'où l'utilisation de plusieurs maquette numérique en même temps sur le modèle physique.


 \section{Expérience et résultats}
@ -734,6 +881,7 @@ Malgrès cela, elle permet d'aller plus vite dans la génération de transition

 \listoftables

+\listoflistings

 % --- Biblio par .bib
 \bibliography{rapport.bib}
@ -762,6 +910,7 @@ Malgrès cela, elle permet d'aller plus vite dans la génération de transition
      \centering
      \includegraphics[height=\vsize, width=\hsize, keepaspectratio]{puml/call_stack_envoie.png}
      \caption[Appel de fonction pour une transaction]{Appel de fontion pour une transaction avec \coapthon{} et mes modifications. Chaque couleur représente un thread d'execution.}
+      \label{fig:call_stack}
  \end{figure}
  \end{landscape}